diff --git a/PhaseUtils/SLM.py b/PhaseUtils/SLM.py
index 249eb33..fb746df 100644
--- a/PhaseUtils/SLM.py
+++ b/PhaseUtils/SLM.py
@@ -35,6 +35,7 @@ def __init__(self, position: int, name: str = "SLM"):
         cv2.namedWindow(name, cv2.WINDOW_NORMAL)
         shift = None
         for m in screeninfo.get_monitors():
+            print(m)
             if str(position) in m.name:
                 shift = m.x
                 self.resX = m.width
diff --git a/PhaseUtils/contrast.py b/PhaseUtils/contrast.py
index c56a367..5d53ba8 100644
--- a/PhaseUtils/contrast.py
+++ b/PhaseUtils/contrast.py
@@ -5,21 +5,27 @@
 import pickle
 import pyfftw
 import matplotlib.pyplot as plt
-import matplotlib.colors as colors
 from scipy.ndimage import gaussian_filter
-from skimage.restoration import unwrap_phase
-from skimage import filters, measure, morphology
+from skimage import filters, measure, morphology, restoration
 from skimage.segmentation import clear_border, flood
 from scipy import optimize
-import cupy as cp
 from numbalsoda import lsoda_sig, lsoda
 import numba
-from numba import cuda
 import cmath
 import math
 from typing import Any
 import multiprocessing
 
+# cupy available logic
+try:
+    import cupy as cp
+
+    CUPY_AVAILABLE = True
+except ImportError:
+    CUPY_AVAILABLE = False
+if CUPY_AVAILABLE:
+    from numba import cuda
+
 pyfftw.interfaces.cache.enable()
 pyfftw.config.NUM_THREADS = multiprocessing.cpu_count()
 # try to load previous fftw wisdom
@@ -30,6 +36,380 @@
 except FileNotFoundError:
     print("No FFT wisdom found, starting over ...")
 
+if CUPY_AVAILABLE:
+
+    @cuda.jit(fastmath=True)
+    def _az_avg_cp(
+        image: cp.ndarray, prof: cp.ndarray, prof_counts: cp.ndarray, center: tuple
+    ):
+        """Kernel for azimuthal average calculation
+
+        Args:
+            image (cp.ndarray): The image from which to calculate the azimuthal average
+            prof (cp.ndarray): A vector containing the bins
+            prof_counts (cp.ndarray): A vector of same size as prof to count each bin
+        """
+        i, j = numba.cuda.grid(2)
+        if i < image.shape[0] and j < image.shape[1]:
+            dist = round(math.sqrt((i - center[1]) ** 2 + (j - center[0]) ** 2))
+            prof[dist] += image[i, j]
+            prof_counts[dist] += 1
+
+    def az_avg_cp(image: cp.ndarray, center: tuple) -> cp.ndarray:
+        """Calculates the azimuthally averaged radial profile.
+
+        Args:
+            image (cp.ndarray): The 2D image
+            center (tuple): The [x,y] pixel coordinates used as the center. Defaults to None,
+            which then uses the center of the image (including fractional pixels).
+
+        Returns:
+            cp.ndarray: prof the radially averaged profile
+        """
+        # Calculate the indices from the image
+        max_r = max(
+            [
+                cp.hypot(center[0], center[1]),
+                cp.hypot(center[0] - image.shape[1], center[1]),
+                cp.hypot(center[0] - image.shape[1], center[1] - image.shape[0]),
+                cp.hypot(center[0], center[1] - image.shape[0]),
+            ]
+        )
+        r = cp.arange(1, int(max_r) + 1, 1)
+        prof = cp.zeros_like(r, dtype=np.float32)
+        prof_counts = cp.zeros_like(r, dtype=np.float32)
+        tpb = 16
+        bpgx = math.ceil(image.shape[0] / tpb)
+        bpgy = math.ceil(image.shape[1] / tpb)
+        _az_avg_cp[(bpgx, bpgy), (tpb, tpb)](image, prof, prof_counts, center)
+        prof /= prof_counts
+        return prof
+
+    def cache_cp(
+        radius: int,
+        center: tuple = (1024, 1024),
+        out: bool = True,
+        nb_pix: tuple = (2048, 2048),
+    ) -> np.ndarray:
+        """Defines a circular mask
+
+        Args:
+            radius (int): Radius of the mask
+            center (tuple, optional): Center of the mask. Defaults to (1024, 1024).
+            out (bool, optional): Masks the outside of the disk. Defaults to True.
+            nb_pix (tuple, optional): Shape of the mask. Defaults to (2048, 2048).
+
+        Returns:
+            np.ndarray: The array of booleans defining the mask
+        """
+        Y, X = cp.ogrid[: nb_pix[0], : nb_pix[1]]
+        dist_from_center = cp.hypot(X - center[0], Y - center[1])
+
+        if out:
+            mask = dist_from_center <= radius
+        else:
+            mask = dist_from_center > radius
+
+        return mask
+
+    @cuda.jit((numba.complex64[:, :], numba.float32[:, :]), fastmath=True)
+    def angle_fast_cp(x: cp.ndarray, out: cp.ndarray):
+        """Accelerates a smidge angle by using fastmath
+
+        Args:
+            x (np.ndarray): The complex field
+
+        Returns:
+            np.ndarray: the argument of the complex field
+        """
+        i, j = cuda.grid(2)
+        if i < x.shape[0]:
+            if j < x.shape[1]:
+                out[i, j] = cmath.phase(x[i, j])
+
+    def delta_n_cp(
+        im0: cp.ndarray,
+        I0: float,
+        Pf: float,
+        w0: float,
+        d: float,
+        k: float,
+        L: float,
+        alpha: float = 50,
+        plot: bool = False,
+        err: bool = False,
+    ):
+        """Computes the total dephasing of an interferogram and fits the linear
+        loss coefficient alpha, the nonlinear coefficient n2 and the saturation intensity
+        from a single interferogram.
+
+        :param np.ndarray im0: Image to extract Dn
+        :param float I0: Initial intensity
+        :param float Pf: Final power
+        :param float d: Pixel pitch of the image
+        :param float k: wavenumber
+        :param bool plot: Plots a visualization of the analysis result
+        :param bool err: Returns the error
+        :return tuple: phi_tot, (n2, Isat, alpha) with the errors if err is True.
+        """
+        im = cp.copy(im0)
+        im = im / cp.max(im)
+        im_fringe = im_osc_fast_cp(im, cont=False)
+        im_cont = cp.abs(im_fringe)
+        im_cont /= cp.max(im_cont)
+        # ratio of camera sensor surface over whole beam if waist is bigger than the whole camera
+        If = (
+            Pf
+            / (cp.sum(im_cont) * d**2)
+            * (np.pi * w0**2)
+            / (np.prod(im0.shape) * d**2)
+        )
+        # fit Isat
+
+        def fit_function_Isat(inten, alpha, Isat):
+            return I_z(L, inten, alpha, Isat)
+
+        phase = cp.empty_like(im_fringe, dtype=cp.float32)
+        tpb = 16
+        bpg = math.ceil(phase.shape[0] / tpb)
+        angle_fast_cp[(bpg, bpg), (tpb, tpb)](im_fringe, phase)
+        im_cont *= If
+        im_cont = im_cont.get()
+        phase = phase.get()
+        centre_x, centre_y = centre(im_cont)
+        cont_avg = az_avg(im_cont, center=(centre_x, centre_y))
+        phase = restoration.unwrap_phase(phase, wrap_around=True)
+        phi_avg = az_avg(phase, center=(centre_x, centre_y))
+        phi_avg = gaussian_filter(phi_avg, 50)
+        phi_avg = -phi_avg
+        phi_avg -= np.max(phi_avg)
+        cont_fit = cont_avg[np.linspace(0, len(cont_avg) - 1, 100, dtype=int)]
+        phi_fit = phi_avg[np.linspace(0, len(phi_avg) - 1, 100, dtype=int)]
+        dphi = abs(np.max(phi_fit) - np.min(phi_fit))
+        dn_guess = dphi / (k * L)
+        n2_guess = dn_guess / I0
+        # fit input intensity using waist
+        x = np.linspace(0, len(cont_avg) - 1, len(cont_avg)) * d
+        x = x[np.linspace(0, len(x) - 1, 100, dtype=int)]
+        I_in = I0 * np.exp(-(x**2) / w0**2)
+        (alpha, Isat), cov = optimize.curve_fit(
+            fit_function_Isat,
+            I_in,
+            cont_fit,
+            p0=(alpha, 1e4),
+            bounds=[(0, 1e2), (-np.log(1e-9) / L, 1e7)],
+        )
+        alpha_err, Isat_err = np.sqrt(np.diag(cov))
+
+        def fit_phi_vs_I(inten: np.ndarray, n2):
+            return phi_z(L, inten, k, alpha, n2, Isat)
+
+        (n2,), pcov = optimize.curve_fit(
+            fit_phi_vs_I,
+            I_in,
+            phi_fit,
+            bounds=[(-1e-6,), (0,)],
+            p0=(-n2_guess),
+            maxfev=3200,
+        )
+        # gets fitting covariance/error for each parameter
+        n2_err = np.sqrt(np.diag(pcov))[0]
+        phase_tot = np.abs(
+            phi_z(L, np.array([np.max(I_in)]), k, alpha, n2, Isat)
+            - phi_z(L, np.array([1e-10]), k, alpha, n2, Isat)
+        )
+        if plot:
+            fig, ax = plt.subplots(1, 3)
+            i0 = ax[0].imshow(np.abs(im_cont))
+            ax[0].set_title("Dc intensity")
+            fig.colorbar(i0, ax=ax[0])
+            i1 = ax[1].imshow(phase, cmap="viridis")
+            ax[1].set_title("Unwrapped phase")
+            fig.colorbar(i1, ax=ax[1])
+            ax[2].plot(cont_avg * 1e-4, phi_avg, label="Unwrapped phase")
+            lab = f"n2 = {n2:.2e} +/- {n2_err:.2e}\n"
+            lab += f"alpha = {alpha:.2f} +/- {alpha_err:.2f}\n"
+            lab += f"Isat = {Isat*1e-4:.2f} +/- {Isat_err*1e-4:.2f} W/cm²\n"
+            ax[2].plot(
+                I_z(L, I_in, alpha, Isat) * 1e-4, fit_phi_vs_I(I_in, n2), label=lab
+            )
+            ax[2].plot(cont_avg * 1e-4, phi_avg, label="Unwrapped phase filtered")
+            ax[2].set_title("Azimuthal average")
+            ax[2].set_xlabel(r"Az avg output intensity $W/cm^2$")
+            ax[2].set_ylabel("Phase in rad")
+            ax[2].legend()
+            # plt.tight_layout()
+            plt.show()
+        if err:
+            return phase_tot, (n2, Isat, alpha), (n2_err, Isat_err, alpha_err)
+        else:
+            return phase_tot, (n2, Isat, alpha)
+
+    def im_osc_fast_t_cp(
+        im: cp.ndarray, radius: int = None, cont: bool = False, quadran: str = "upper"
+    ) -> cp.ndarray:
+        """Fast field recovery assuming ideal reference angle i.e minimum fringe size of sqrt(2) pixels
+        Truncated for optimal speed
+        Args:
+            im (cp.ndarray): Interferogram
+            radius (int, optional): Radius of filter in px. Defaults to 512.
+            return_cont (bool, optionnal): Returns the continuous part of the field. Defaults to False.
+
+        Returns:
+            cp.ndarray: Recovered field
+        """
+        if radius is None:
+            radius = max(im.shape) // 4
+        # center of first quadran
+        if quadran == "upper":
+            center = (im.shape[0] // 4, im.shape[1] // 4)
+        elif quadran == "lower":
+            center = (im.shape[0] // 4 + im.shape[0] // 2, im.shape[1] // 4)
+        assert len(im.shape) == 2, "Can only work with 2D images !"
+        # center of first quadran
+        im_ifft = cp.zeros((im.shape[0] // 2, im.shape[1] // 2), dtype=np.complex64)
+        im_fft = cp.fft.rfft2(im)
+        Y, X = cp.ogrid[: im_fft.shape[0], : im_fft.shape[1]]
+        dist_from_center = cp.hypot(X - center[1], Y - center[0])
+        mask = dist_from_center > radius
+        if cont:
+            cont_size = int((np.sqrt(2) - 1) * radius)
+            im_ifft_cont = cp.empty(
+                (im.shape[0] // 2, im.shape[1] // 2), dtype=np.complex64
+            )
+            mask_cont = cache_cp(
+                cont_size, out=False, center=(0, 0), nb_pix=im_ifft_cont.shape
+            )
+            mask_cont = cp.logical_xor(
+                mask_cont,
+                cache_cp(
+                    cont_size,
+                    out=False,
+                    center=(0, im_ifft_cont.shape[0]),
+                    nb_pix=im_ifft_cont.shape,
+                ),
+            )
+            im_ifft_cont[0 : im_ifft_cont.shape[0] // 2, :] = im_fft[
+                0 : im_ifft_cont.shape[0] // 2, 0 : im_ifft_cont.shape[1]
+            ]
+            im_ifft_cont[im_ifft_cont.shape[0] // 2 :, :] = im_fft[
+                im_fft.shape[0] - im_ifft_cont.shape[0] // 2 : im_fft.shape[0],
+                0 : im_ifft_cont.shape[1],
+            ]
+            im_ifft_cont[cp.logical_not(mask_cont)] = 0
+            im_cont = cp.fft.ifft2(im_ifft_cont)
+        im_fft[mask] = 0
+        if quadran == "upper":
+            im_ifft[:, :] = im_fft[: im_fft.shape[0] // 2, : im_fft.shape[1] - 1]
+        elif quadran == "lower":
+            im_ifft[:, :] = im_fft[im_fft.shape[0] // 2 :, : im_fft.shape[1] - 1]
+        im_ifft = cp.fft.fftshift(im_ifft)
+        im_ifft = cp.fft.ifft2(im_ifft)
+        im_ifft *= cp.exp(
+            -1j * cp.angle(im_ifft[im_ifft.shape[0] // 2, im_ifft.shape[1] // 2])
+        )
+        if cont:
+            return im_cont, im_ifft
+        return im_ifft
+
+    def im_osc_fast_cp(
+        im: cp.ndarray, radius: int = 0, cont: bool = False
+    ) -> cp.ndarray:
+        """Fast field recovery assuming ideal reference angle
+
+        Args:
+            im (cp.ndarray): Interferogram
+            radius (int, optional): Radius of filter in px. Defaults to 512.
+            return_cont (bool, optionnal): Returns the continuous part of the field. Defaults to False.
+
+        Returns:
+            np.ndarray: Recovered field
+        """
+        if radius == 0:
+            radius = min(im.shape) // 4
+        center = (im.shape[0] // 4, im.shape[1] // 4)
+        assert len(im.shape) == 2, "Can only work with 2D images !"
+        # center of first quadran
+        im_ifft = cp.zeros((im.shape[0], im.shape[1]), dtype=np.complex64)
+        im_fft = cp.fft.rfft2(im)
+        Y, X = cp.ogrid[: im_fft.shape[0], : im_fft.shape[1]]
+        dist_from_center = cp.hypot(X - center[1], Y - center[0])
+        mask = dist_from_center > radius
+        if cont:
+            cont_size = int((np.sqrt(2) - 1) * radius)
+            im_ifft_cont = im_fft.copy()
+            mask_cont = cache_cp(
+                cont_size, out=False, center=(0, 0), nb_pix=im_ifft_cont.shape
+            )
+            mask_cont = cp.logical_xor(
+                mask_cont,
+                cache_cp(
+                    cont_size,
+                    out=False,
+                    center=(0, im_ifft_cont.shape[0]),
+                    nb_pix=im_ifft_cont.shape,
+                ),
+            )
+            im_ifft_cont[cp.logical_not(mask_cont)] = 0
+            im_cont = cp.fft.irfft2(im_ifft_cont)
+        im_fft[mask] = 0
+        im_ifft[
+            im_ifft.shape[0] // 2 - radius : im_ifft.shape[0] // 2 + radius,
+            im_ifft.shape[1] // 2 - radius : im_ifft.shape[1] // 2 + radius,
+        ] = im_fft[
+            center[0] - radius : center[0] + radius,
+            center[1] - radius : center[1] + radius,
+        ]
+        im_ifft = cp.fft.fftshift(im_ifft)
+        im_ifft = cp.fft.ifft2(im_ifft)
+        im_ifft *= np.exp(
+            -1j * cp.angle(im_ifft[im_ifft.shape[0] // 2, im_ifft.shape[1] // 2])
+        )
+        if cont:
+            return im_cont, im_ifft
+        return im_ifft
+
+    def phase_fast_cp(
+        im: cp.ndarray, radius: int = 0, cont: bool = False
+    ) -> cp.ndarray:
+        """Fast phase recovery assuming ideal reference angle
+
+        Args:
+            im (cp.ndarray): Interferogram
+            radius (int, optional): Radius of filter in px. Defaults to 512.
+            return_cont (bool, optionnal): Returns the continuous part of the field. Defaults to False.
+
+        Returns:
+            np.ndarray: Recovered phase
+        """
+        angle = cp.empty((im.shape[0] // 2, im.shape[1] // 2), dtype=np.float32)
+        tpb = 16
+        bpgx = math.ceil(angle.shape[0] / tpb)
+        bpgy = math.ceil(angle.shape[1] / tpb)
+        if cont:
+            im_ifft, im_cont = im_osc_fast_t_cp(im, radius=radius, cont=True)
+            angle_fast_cp[(bpgx, bpgy), (tpb, tpb)](im_ifft, angle)
+            return angle, im_cont
+        im_ifft = im_osc_fast_t_cp(im, radius=radius, cont=False)
+        angle_fast_cp[(bpgx, bpgy), (tpb, tpb)](im_ifft, angle)
+        return angle
+
+
+def contr_fast_cp(im: cp.ndarray) -> cp.ndarray:
+    """Computes the contrast of an interferogram assuming proper alignment
+    i.e minimum fringe size of sqrt(2) pixels
+
+    Args:
+        im (np.ndarray): The interferogram
+
+    Returns:
+        np.ndarray: The contrast map
+    """
+    im_cont, im_fringe = im_osc_fast_cp(im, cont=True)
+    analytic = cp.abs(im_fringe)
+    cont = cp.abs(im_cont)
+    return 2 * analytic / cont
+
 
 def gauss_fit(x, waist, mean):
     """Gaussian BEAM intensity fitting
@@ -79,55 +459,6 @@ def az_avg(image: np.ndarray, center: tuple) -> np.ndarray:
     return prof
 
 
-@cuda.jit(fastmath=True)
-def _az_avg_cp(
-    image: cp.ndarray, prof: cp.ndarray, prof_counts: cp.ndarray, center: tuple
-):
-    """Kernel for azimuthal average calculation
-
-    Args:
-        image (cp.ndarray): The image from which to calculate the azimuthal average
-        prof (cp.ndarray): A vector containing the bins
-        prof_counts (cp.ndarray): A vector of same size as prof to count each bin
-    """
-    i, j = numba.cuda.grid(2)
-    if i < image.shape[0] and j < image.shape[1]:
-        dist = round(math.sqrt((i - center[1]) ** 2 + (j - center[0]) ** 2))
-        prof[dist] += image[i, j]
-        prof_counts[dist] += 1
-
-
-def az_avg_cp(image: cp.ndarray, center: tuple) -> cp.ndarray:
-    """Calculates the azimuthally averaged radial profile.
-
-    Args:
-        image (cp.ndarray): The 2D image
-        center (tuple): The [x,y] pixel coordinates used as the center. Defaults to None,
-        which then uses the center of the image (including fractional pixels).
-
-    Returns:
-        cp.ndarray: prof the radially averaged profile
-    """
-    # Calculate the indices from the image
-    max_r = max(
-        [
-            cp.hypot(center[0], center[1]),
-            cp.hypot(center[0] - image.shape[1], center[1]),
-            cp.hypot(center[0] - image.shape[1], center[1] - image.shape[0]),
-            cp.hypot(center[0], center[1] - image.shape[0]),
-        ]
-    )
-    r = cp.arange(1, int(max_r) + 1, 1)
-    prof = cp.zeros_like(r, dtype=np.float32)
-    prof_counts = cp.zeros_like(r, dtype=np.float32)
-    tpb = 16
-    bpgx = math.ceil(image.shape[0] / tpb)
-    bpgy = math.ceil(image.shape[1] / tpb)
-    _az_avg_cp[(bpgx, bpgy), (tpb, tpb)](image, prof, prof_counts, center)
-    prof /= prof_counts
-    return prof
-
-
 @numba.njit(fastmath=True, cache=True, parallel=True)
 def angle_fast(x: np.ndarray) -> np.ndarray:
     """Accelerates a smidge angle by using fastmath
@@ -145,22 +476,6 @@ def angle_fast(x: np.ndarray) -> np.ndarray:
     return out
 
 
-@cuda.jit((numba.complex64[:, :], numba.float32[:, :]), fastmath=True)
-def angle_fast_cp(x: cp.ndarray, out: cp.ndarray):
-    """Accelerates a smidge angle by using fastmath
-
-    Args:
-        x (np.ndarray): The complex field
-
-    Returns:
-        np.ndarray: the argument of the complex field
-    """
-    i, j = cuda.grid(2)
-    if i < x.shape[0]:
-        if j < x.shape[1]:
-            out[i, j] = cmath.phase(x[i, j])
-
-
 @numba.njit(fastmath=True, nogil=True, cache=True, parallel=True)
 def exp_angle_fast(x: np.ndarray, y: np.ndarray):
     """Fast multiplication by exp(-1j*x)
@@ -313,34 +628,6 @@ def cache(
     return mask
 
 
-def cache_cp(
-    radius: int,
-    center: tuple = (1024, 1024),
-    out: bool = True,
-    nb_pix: tuple = (2048, 2048),
-) -> np.ndarray:
-    """Defines a circular mask
-
-    Args:
-        radius (int): Radius of the mask
-        center (tuple, optional): Center of the mask. Defaults to (1024, 1024).
-        out (bool, optional): Masks the outside of the disk. Defaults to True.
-        nb_pix (tuple, optional): Shape of the mask. Defaults to (2048, 2048).
-
-    Returns:
-        np.ndarray: The array of booleans defining the mask
-    """
-    Y, X = cp.ogrid[: nb_pix[0], : nb_pix[1]]
-    dist_from_center = cp.hypot(X - center[0], Y - center[1])
-
-    if out:
-        mask = dist_from_center <= radius
-    else:
-        mask = dist_from_center > radius
-
-    return mask
-
-
 def im_osc(
     im: np.ndarray,
     cont: bool = False,
@@ -466,221 +753,6 @@ def im_osc(
     return im_fringe
 
 
-def im_osc_center(
-    im: np.ndarray,
-    center: tuple,
-    mask_osc_flood: np.ndarray = None,
-    cont: bool = False,
-    plot: bool = False,
-    big: bool = False,
-) -> tuple:
-    """Separates the continuous and oscillating components of an image using
-    Fourier filtering.
-
-    :param np.ndarray im: Description of parameter `im`.
-    :param tuple center: i,j position of the 1st order
-    :param np.ndarray mask_osc_flood: mask for the 1st order
-    :param bool cont: Returns or not the continuons component
-    :param bool plot: Plots a visualization of the analysis result
-    :return np.ndarray: The oscillating component of the image, or both
-    components
-
-    """
-
-    im_fft = np.fft.fftshift(pyfftw.interfaces.numpy_fft.fft2(im))
-    im_fft_fringe = im_fft.copy()
-    im_fft_cont = im_fft.copy()
-    fft_filt = gaussian_filter(np.abs(im_fft), 1e-3 * im_fft.shape[0])
-    cont_size = 20
-    mask_cont_flood = cache(
-        cont_size,
-        out=False,
-        center=(im.shape[0] // 2, im.shape[1] // 2),
-        nb_pix=im.shape,
-    )
-    if mask_osc_flood is None:
-        dbl_gradient = np.log(
-            np.abs(np.gradient(fft_filt, axis=0))
-            + np.abs(np.gradient(fft_filt, axis=1))
-        )
-        m_value = np.nanmean(dbl_gradient[dbl_gradient != -np.infty])
-        dbl_gradient[np.bitwise_not(mask_cont_flood)] = m_value
-        dbl_gradient_int = dbl_gradient * (dbl_gradient > 0.8 * np.nanmax(dbl_gradient))
-        dbl_gradient_int /= np.nanmax(dbl_gradient_int)
-        dbl_gradient_int = (255 * dbl_gradient_int).astype(np.uint8)
-        threshold = filters.threshold_otsu(dbl_gradient_int)
-        mask = dbl_gradient_int > threshold
-        mask = morphology.remove_small_objects(mask, 1)
-        mask = morphology.remove_small_holes(mask, 1)
-        mask = clear_border(mask)
-        mask = morphology.remove_small_holes(mask, 1, connectivity=2)
-        labels = measure.label(mask)
-        props = measure.regionprops(labels, dbl_gradient_int)
-        # takes the spot with the maximum area
-        areas = [prop.area for prop in props]
-        maxi_area = np.where(areas == max(areas))[0][0]
-        label_osc = props[maxi_area].label
-        contour_osc = measure.find_contours(labels == label_osc, 0.5)[0]
-        y, x = contour_osc.T
-        y = y.astype(int)
-        x = x.astype(int)
-        mask_osc = np.zeros(im_fft.shape)
-        mask_osc[y, x] = 1
-        mask_osc_flood = flood(mask_osc, (y[0] + 1, x[0] + 1), connectivity=1)
-        if big:
-            r_osc = np.max(
-                [
-                    [np.hypot(x[i] - x[j], y[i] - y[j]) for j in range(len(x))]
-                    for i in range(len(x))
-                ]
-            )
-            mask_osc_flood = cache(r_osc, out=False, center=(center, center))
-    im_fft_fringe[mask_osc_flood] = 0
-    im_fft_cont[mask_cont_flood] = 0
-    # bring osc part to center to remove tilt
-    im_fft_fringe = np.roll(
-        im_fft_fringe,
-        (
-            im_fft_fringe.shape[0] // 2 - center[0],
-            im_fft_fringe.shape[1] // 2 - center[1],
-        ),
-        axis=(-2, -1),
-    )
-    im_fringe = pyfftw.interfaces.numpy_fft.ifft2(np.fft.fftshift(im_fft_fringe))
-    im_cont = pyfftw.interfaces.numpy_fft.ifft2(np.fft.fftshift(im_fft_cont))
-    # save FFT wisdom
-    with open("fft.wisdom", "wb") as file:
-        wisdom = pyfftw.export_wisdom()
-        pickle.dump(wisdom, file)
-    if plot:
-        circle = plt.Circle(
-            (im.shape[1] // 2, im.shape[0] // 2), cont_size // 2, color="b", fill=False
-        )
-        fig, ax = plt.subplots(1, 4)
-        im0 = ax[0].imshow(im, cmap="gray")
-        ax[0].set_title("Real space")
-        fig.colorbar(im0, ax=ax[0])
-        im = ax[1].imshow(
-            np.abs(im_fft),
-            norm=colors.SymLogNorm(
-                linthresh=0.03,
-                linscale=0.03,
-                vmin=np.min(np.abs(im_fft)),
-                vmax=np.max(np.abs(im_fft)),
-                base=10,
-            ),
-        )
-        fig.colorbar(im, ax=ax[1])
-        if mask_osc_flood is None:
-            ax[1].plot(x, y, color="r", ls="--")
-        else:
-            ax[1].imshow(mask_osc_flood, alpha=0.35, cmap="gray")
-        ax[1].add_patch(circle)
-        if big and mask_osc_flood is None:
-            circle_big = plt.Circle(
-                (center[1], center[0]), r_osc, color="r", fill=False
-            )
-            ax[1].add_patch(circle_big)
-
-        ax[1].set_title("Fourier space")
-        ax[1].legend(["Oscillating", "Continuous"])
-        im = ax[2].imshow(
-            np.abs(im_fft_fringe),
-            norm=colors.SymLogNorm(
-                linthresh=0.03,
-                linscale=0.03,
-                vmin=np.min(np.abs(im_fft)),
-                vmax=np.max(np.abs(im_fft)),
-                base=10,
-            ),
-        )
-        fig.colorbar(im, ax=ax[2])
-        ax[2].set_title("Filtered Fourier signal")
-        im = ax[3].imshow(np.angle(im_fringe), cmap="twilight")
-        fig.colorbar(im, ax=ax[3])
-        ax[3].set_title("Phase of filtered signal")
-        plt.show()
-    if cont:
-        return im_cont, im_fringe
-    return im_fringe
-
-
-def im_osc_mask(
-    im: np.ndarray, masks: tuple, cont: bool = True, plot: bool = False
-) -> tuple:
-    """Separates the continuous and oscillating components of an image using
-    Fourier filtering.
-
-    :param np.ndarray im: Description of parameter `im`.
-    :param tuple masks: Continuous and oscillating masks
-    :param bool cont: Returns or not the continuons component
-    :param bool plot: Plots a visualization of the analysis result
-    :return np.ndarray: The oscillating component of the image, or both
-    components
-
-    """
-    mask_cont_flood, mask_osc_flood, center_osc = masks
-    center_osc[0] = int(center_osc[0])
-    center_osc[1] = int(center_osc[1])
-    im_fft = np.fft.fftshift(pyfftw.interfaces.numpy_fft.fft2(im))
-    im_fft_fringe = im_fft.copy()
-    im_fft_cont = im_fft.copy()
-    im_fft_fringe[mask_osc_flood] = 0
-    im_fft_cont[mask_cont_flood] = 0
-    # bring osc part to center to remove tilt
-    im_fft_fringe = np.roll(
-        im_fft_fringe,
-        (
-            im_fft_fringe.shape[0] // 2 - center_osc[0],
-            im_fft_fringe.shape[1] // 2 - center_osc[1],
-        ),
-        axis=(-2, -1),
-    )
-    im_fringe = pyfftw.interfaces.numpy_fft.ifft2(np.fft.fftshift(im_fft_fringe))
-    im_cont = pyfftw.interfaces.numpy_fft.ifft2(np.fft.fftshift(im_fft_cont))
-    # save FFT wisdom
-    with open("fft.wisdom", "wb") as file:
-        wisdom = pyfftw.export_wisdom()
-        pickle.dump(wisdom, file)
-    if plot:
-        fig, ax = plt.subplots(1, 4)
-        im0 = ax[0].imshow(im, cmap="gray")
-        ax[0].set_title("Real space")
-        fig.colorbar(im0, ax=ax[0])
-        im = ax[1].imshow(
-            np.abs(im_fft),
-            norm=colors.SymLogNorm(
-                linthresh=0.03,
-                linscale=0.03,
-                vmin=np.min(np.abs(im_fft)),
-                vmax=np.max(np.abs(im_fft)),
-                base=10,
-            ),
-        )
-        ax[1].scatter(center_osc[1], center_osc[0], color="red")
-        fig.colorbar(im, ax=ax[1])
-        ax[1].set_title("Fourier space")
-        im = ax[2].imshow(
-            np.abs(im_fft_fringe),
-            norm=colors.SymLogNorm(
-                linthresh=0.03,
-                linscale=0.03,
-                vmin=np.min(np.abs(im_fft)),
-                vmax=np.max(np.abs(im_fft)),
-                base=10,
-            ),
-        )
-        fig.colorbar(im, ax=ax[2])
-        ax[2].set_title("Filtered Fourier signal")
-        im = ax[3].imshow(np.angle(im_fringe), cmap="twilight")
-        fig.colorbar(im, ax=ax[3])
-        ax[3].set_title("Phase of filtered signal")
-        plt.show()
-    if cont:
-        return im_cont, im_fringe
-    return im_fringe
-
-
 # objective function for Dn vs I fitting
 
 
@@ -836,7 +908,7 @@ def fit_function_Isat(inten, alpha, Isat):
     im_cont *= If
     centre_x, centre_y = centre(im_cont)
     cont_avg = az_avg(im_cont, center=(centre_x, centre_y))
-    phase = unwrap_phase(phase_raw, wrap_around=False)
+    phase = restoration.unwrap_phase(phase_raw, wrap_around=False)
     phi_avg = az_avg(phase, center=(centre_x, centre_y))
     phi_avg = gaussian_filter(phi_avg, 25)
     cont_avg = gaussian_filter(cont_avg, 25)
@@ -923,118 +995,6 @@ def fit_phi_vs_I(inten: np.ndarray, n2: float):
         return phase_tot, (n2, Isat, alpha)
 
 
-def delta_n_cp(
-    im0: cp.ndarray,
-    I0: float,
-    Pf: float,
-    w0: float,
-    d: float,
-    k: float,
-    L: float,
-    alpha: float = 50,
-    plot: bool = False,
-    err: bool = False,
-):
-    """Computes the total dephasing of an interferogram and fits the linear
-    loss coefficient alpha, the nonlinear coefficient n2 and the saturation intensity
-    from a single interferogram.
-
-    :param np.ndarray im0: Image to extract Dn
-    :param float I0: Initial intensity
-    :param float Pf: Final power
-    :param float d: Pixel pitch of the image
-    :param float k: wavenumber
-    :param bool plot: Plots a visualization of the analysis result
-    :param bool err: Returns the error
-    :return tuple: phi_tot, (n2, Isat, alpha) with the errors if err is True.
-    """
-    im = cp.copy(im0)
-    im = im / cp.max(im)
-    im_fringe = im_osc_fast_cp(im, cont=False)
-    im_cont = cp.abs(im_fringe)
-    im_cont /= cp.max(im_cont)
-    # ratio of camera sensor surface over whole beam if waist is bigger than the whole camera
-    If = Pf / (cp.sum(im_cont) * d**2) * (np.pi * w0**2) / (np.prod(im0.shape) * d**2)
-    # fit Isat
-
-    def fit_function_Isat(inten, alpha, Isat):
-        return I_z(L, inten, alpha, Isat)
-
-    phase = cp.empty_like(im_fringe, dtype=cp.float32)
-    tpb = 16
-    bpg = math.ceil(phase.shape[0] / tpb)
-    angle_fast_cp[(bpg, bpg), (tpb, tpb)](im_fringe, phase)
-    im_cont *= If
-    im_cont = im_cont.get()
-    phase = phase.get()
-    centre_x, centre_y = centre(im_cont)
-    cont_avg = az_avg(im_cont, center=(centre_x, centre_y))
-    phase = unwrap_phase(phase, wrap_around=True)
-    phi_avg = az_avg(phase, center=(centre_x, centre_y))
-    phi_avg = gaussian_filter(phi_avg, 50)
-    phi_avg = -phi_avg
-    phi_avg -= np.max(phi_avg)
-    cont_fit = cont_avg[np.linspace(0, len(cont_avg) - 1, 100, dtype=int)]
-    phi_fit = phi_avg[np.linspace(0, len(phi_avg) - 1, 100, dtype=int)]
-    dphi = abs(np.max(phi_fit) - np.min(phi_fit))
-    dn_guess = dphi / (k * L)
-    n2_guess = dn_guess / I0
-    # fit input intensity using waist
-    x = np.linspace(0, len(cont_avg) - 1, len(cont_avg)) * d
-    x = x[np.linspace(0, len(x) - 1, 100, dtype=int)]
-    I_in = I0 * np.exp(-(x**2) / w0**2)
-    (alpha, Isat), cov = optimize.curve_fit(
-        fit_function_Isat,
-        I_in,
-        cont_fit,
-        p0=(alpha, 1e4),
-        bounds=[(0, 1e2), (-np.log(1e-9) / L, 1e7)],
-    )
-    alpha_err, Isat_err = np.sqrt(np.diag(cov))
-
-    def fit_phi_vs_I(inten: np.ndarray, n2):
-        return phi_z(L, inten, k, alpha, n2, Isat)
-
-    (n2,), pcov = optimize.curve_fit(
-        fit_phi_vs_I,
-        I_in,
-        phi_fit,
-        bounds=[(-1e-6,), (0,)],
-        p0=(-n2_guess),
-        maxfev=3200,
-    )
-    # gets fitting covariance/error for each parameter
-    n2_err = np.sqrt(np.diag(pcov))[0]
-    phase_tot = np.abs(
-        phi_z(L, np.array([np.max(I_in)]), k, alpha, n2, Isat)
-        - phi_z(L, np.array([1e-10]), k, alpha, n2, Isat)
-    )
-    if plot:
-        fig, ax = plt.subplots(1, 3)
-        i0 = ax[0].imshow(np.abs(im_cont))
-        ax[0].set_title("Dc intensity")
-        fig.colorbar(i0, ax=ax[0])
-        i1 = ax[1].imshow(phase, cmap="viridis")
-        ax[1].set_title("Unwrapped phase")
-        fig.colorbar(i1, ax=ax[1])
-        ax[2].plot(cont_avg * 1e-4, phi_avg, label="Unwrapped phase")
-        lab = f"n2 = {n2:.2e} +/- {n2_err:.2e}\n"
-        lab += f"alpha = {alpha:.2f} +/- {alpha_err:.2f}\n"
-        lab += f"Isat = {Isat*1e-4:.2f} +/- {Isat_err*1e-4:.2f} W/cm²\n"
-        ax[2].plot(I_z(L, I_in, alpha, Isat) * 1e-4, fit_phi_vs_I(I_in, n2), label=lab)
-        ax[2].plot(cont_avg * 1e-4, phi_avg, label="Unwrapped phase filtered")
-        ax[2].set_title("Azimuthal average")
-        ax[2].set_xlabel(r"Az avg output intensity $W/cm^2$")
-        ax[2].set_ylabel("Phase in rad")
-        ax[2].legend()
-        # plt.tight_layout()
-        plt.show()
-    if err:
-        return phase_tot, (n2, Isat, alpha), (n2_err, Isat_err, alpha_err)
-    else:
-        return phase_tot, (n2, Isat, alpha)
-
-
 def contr(im: np.ndarray) -> np.ndarray:
     """Computes the contrast of an interferogram
 
@@ -1062,41 +1022,12 @@ def phase(
     Returns:
         np.ndarray: The unwrapped phase
     """
-    if masks is not None:
-        im_fringe = im_osc_mask(im, masks, cont=False, plot=plot, big=big)
-    else:
-        im_fringe = im_osc(im, cont=False, plot=plot, big=big)
-    im_phase = unwrap_phase(np.angle(im_fringe))
+    im_fringe = im_osc(im, cont=False, plot=plot, big=big)
+    im_phase = restoration.unwrap_phase(np.angle(im_fringe))
 
     return im_phase
 
 
-def phase_center(
-    im: np.ndarray,
-    center: tuple,
-    mask_osc_flood: np.ndarray = None,
-    plot: bool = False,
-    masks: tuple = None,
-    big: bool = False,
-    unwrap=True,
-) -> np.ndarray:
-    """Returns the phase from an interfogram
-
-    Args:
-        im (np.ndarray): The interferogram
-        plot (bool) : whether to plot something
-
-    Returns:
-        np.ndarray: The unwrapped phase
-    """
-    im_fringe = im_osc_center(
-        im, center, mask_osc_flood=mask_osc_flood, cont=False, plot=plot, big=big
-    )
-    if unwrap:
-        return unwrap_phase(np.angle(im_fringe))
-    return np.angle(im_fringe)
-
-
 def im_osc_fast(
     im: np.ndarray,
     radius: int = 0,
@@ -1311,129 +1242,6 @@ def im_osc_fast_t(
     return im_ifft
 
 
-def im_osc_fast_t_cp(
-    im: cp.ndarray, radius: int = None, cont: bool = False, quadran: str = "upper"
-) -> cp.ndarray:
-    """Fast field recovery assuming ideal reference angle i.e minimum fringe size of sqrt(2) pixels
-    Truncated for optimal speed
-    Args:
-        im (cp.ndarray): Interferogram
-        radius (int, optional): Radius of filter in px. Defaults to 512.
-        return_cont (bool, optionnal): Returns the continuous part of the field. Defaults to False.
-
-    Returns:
-        cp.ndarray: Recovered field
-    """
-    if radius is None:
-        radius = max(im.shape) // 4
-    # center of first quadran
-    if quadran == "upper":
-        center = (im.shape[0] // 4, im.shape[1] // 4)
-    elif quadran == "lower":
-        center = (im.shape[0] // 4 + im.shape[0] // 2, im.shape[1] // 4)
-    assert len(im.shape) == 2, "Can only work with 2D images !"
-    # center of first quadran
-    im_ifft = cp.zeros((im.shape[0] // 2, im.shape[1] // 2), dtype=np.complex64)
-    im_fft = cp.fft.rfft2(im)
-    Y, X = cp.ogrid[: im_fft.shape[0], : im_fft.shape[1]]
-    dist_from_center = cp.hypot(X - center[1], Y - center[0])
-    mask = dist_from_center > radius
-    if cont:
-        cont_size = int((np.sqrt(2) - 1) * radius)
-        im_ifft_cont = cp.empty(
-            (im.shape[0] // 2, im.shape[1] // 2), dtype=np.complex64
-        )
-        mask_cont = cache_cp(
-            cont_size, out=False, center=(0, 0), nb_pix=im_ifft_cont.shape
-        )
-        mask_cont = cp.logical_xor(
-            mask_cont,
-            cache_cp(
-                cont_size,
-                out=False,
-                center=(0, im_ifft_cont.shape[0]),
-                nb_pix=im_ifft_cont.shape,
-            ),
-        )
-        im_ifft_cont[0 : im_ifft_cont.shape[0] // 2, :] = im_fft[
-            0 : im_ifft_cont.shape[0] // 2, 0 : im_ifft_cont.shape[1]
-        ]
-        im_ifft_cont[im_ifft_cont.shape[0] // 2 :, :] = im_fft[
-            im_fft.shape[0] - im_ifft_cont.shape[0] // 2 : im_fft.shape[0],
-            0 : im_ifft_cont.shape[1],
-        ]
-        im_ifft_cont[cp.logical_not(mask_cont)] = 0
-        im_cont = cp.fft.ifft2(im_ifft_cont)
-    im_fft[mask] = 0
-    if quadran == "upper":
-        im_ifft[:, :] = im_fft[: im_fft.shape[0] // 2, : im_fft.shape[1] - 1]
-    elif quadran == "lower":
-        im_ifft[:, :] = im_fft[im_fft.shape[0] // 2 :, : im_fft.shape[1] - 1]
-    im_ifft = cp.fft.fftshift(im_ifft)
-    im_ifft = cp.fft.ifft2(im_ifft)
-    im_ifft *= cp.exp(
-        -1j * cp.angle(im_ifft[im_ifft.shape[0] // 2, im_ifft.shape[1] // 2])
-    )
-    if cont:
-        return im_cont, im_ifft
-    return im_ifft
-
-
-def im_osc_fast_cp(im: cp.ndarray, radius: int = 0, cont: bool = False) -> cp.ndarray:
-    """Fast field recovery assuming ideal reference angle
-
-    Args:
-        im (cp.ndarray): Interferogram
-        radius (int, optional): Radius of filter in px. Defaults to 512.
-        return_cont (bool, optionnal): Returns the continuous part of the field. Defaults to False.
-
-    Returns:
-        np.ndarray: Recovered field
-    """
-    if radius == 0:
-        radius = min(im.shape) // 4
-    center = (im.shape[0] // 4, im.shape[1] // 4)
-    assert len(im.shape) == 2, "Can only work with 2D images !"
-    # center of first quadran
-    im_ifft = cp.zeros((im.shape[0], im.shape[1]), dtype=np.complex64)
-    im_fft = cp.fft.rfft2(im)
-    Y, X = cp.ogrid[: im_fft.shape[0], : im_fft.shape[1]]
-    dist_from_center = cp.hypot(X - center[1], Y - center[0])
-    mask = dist_from_center > radius
-    if cont:
-        cont_size = int((np.sqrt(2) - 1) * radius)
-        im_ifft_cont = im_fft.copy()
-        mask_cont = cache_cp(
-            cont_size, out=False, center=(0, 0), nb_pix=im_ifft_cont.shape
-        )
-        mask_cont = cp.logical_xor(
-            mask_cont,
-            cache_cp(
-                cont_size,
-                out=False,
-                center=(0, im_ifft_cont.shape[0]),
-                nb_pix=im_ifft_cont.shape,
-            ),
-        )
-        im_ifft_cont[cp.logical_not(mask_cont)] = 0
-        im_cont = cp.fft.irfft2(im_ifft_cont)
-    im_fft[mask] = 0
-    im_ifft[
-        im_ifft.shape[0] // 2 - radius : im_ifft.shape[0] // 2 + radius,
-        im_ifft.shape[1] // 2 - radius : im_ifft.shape[1] // 2 + radius,
-    ] = im_fft[
-        center[0] - radius : center[0] + radius, center[1] - radius : center[1] + radius
-    ]
-    im_ifft = cp.fft.fftshift(im_ifft)
-    im_ifft = cp.fft.ifft2(im_ifft)
-    im_ifft *= np.exp(
-        -1j * cp.angle(im_ifft[im_ifft.shape[0] // 2, im_ifft.shape[1] // 2])
-    )
-    if cont:
-        return im_cont, im_ifft
-    return im_ifft
-
-
 def phase_fast(im: np.ndarray, radius: int = 0, cont: bool = False) -> np.ndarray:
     """Fast phase recovery assuming ideal reference angle
 
@@ -1454,30 +1262,6 @@ def phase_fast(im: np.ndarray, radius: int = 0, cont: bool = False) -> np.ndarra
     return phase
 
 
-def phase_fast_cp(im: cp.ndarray, radius: int = 0, cont: bool = False) -> cp.ndarray:
-    """Fast phase recovery assuming ideal reference angle
-
-    Args:
-        im (cp.ndarray): Interferogram
-        radius (int, optional): Radius of filter in px. Defaults to 512.
-        return_cont (bool, optionnal): Returns the continuous part of the field. Defaults to False.
-
-    Returns:
-        np.ndarray: Recovered phase
-    """
-    angle = cp.empty((im.shape[0] // 2, im.shape[1] // 2), dtype=np.float32)
-    tpb = 16
-    bpgx = math.ceil(angle.shape[0] / tpb)
-    bpgy = math.ceil(angle.shape[1] / tpb)
-    if cont:
-        im_ifft, im_cont = im_osc_fast_t_cp(im, radius=radius, cont=True)
-        angle_fast_cp[(bpgx, bpgy), (tpb, tpb)](im_ifft, angle)
-        return angle, im_cont
-    im_ifft = im_osc_fast_t_cp(im, radius=radius, cont=False)
-    angle_fast_cp[(bpgx, bpgy), (tpb, tpb)](im_ifft, angle)
-    return angle
-
-
 def contr_fast(im: np.ndarray) -> np.ndarray:
     """Computes the contrast of an interferogram assuming proper alignment
     i.e minimum fringe size of sqrt(2) pixels
@@ -1492,19 +1276,3 @@ def contr_fast(im: np.ndarray) -> np.ndarray:
     analytic = np.abs(im_fringe)
     cont = np.abs(im_cont)
     return 2 * analytic / cont
-
-
-def contr_fast_cp(im: cp.ndarray) -> cp.ndarray:
-    """Computes the contrast of an interferogram assuming proper alignment
-    i.e minimum fringe size of sqrt(2) pixels
-
-    Args:
-        im (np.ndarray): The interferogram
-
-    Returns:
-        np.ndarray: The contrast map
-    """
-    im_cont, im_fringe = im_osc_fast_cp(im, cont=True)
-    analytic = cp.abs(im_fringe)
-    cont = cp.abs(im_cont)
-    return 2 * analytic / cont
diff --git a/PhaseUtils/velocity.py b/PhaseUtils/velocity.py
index 31e8bce..c8fd394 100644
--- a/PhaseUtils/velocity.py
+++ b/PhaseUtils/velocity.py
@@ -13,11 +13,19 @@
 import numpy as np
 import pyfftw
 import pickle
-import cupy as cp
 import networkx as nx
 import multiprocessing
 from matplotlib import colors
 
+# cupy available logic
+try:
+    import cupy as cp
+
+    CUPY_AVAILABLE = True
+except ImportError:
+    CUPY_AVAILABLE = False
+if CUPY_AVAILABLE:
+    from numba import cuda
 pyfftw.config.NUM_THREADS = multiprocessing.cpu_count()
 pyfftw.config.PLANNER_EFFORT = "FFTW_ESTIMATE"
 pyfftw.interfaces.cache.enable()
@@ -30,6 +38,465 @@
 except FileNotFoundError:
     print("No FFT wisdom found, starting over ...")
 
+if CUPY_AVAILABLE:
+
+    @cuda.jit(fastmath=True)
+    def _az_avg_cp(
+        image: cp.ndarray, prof: cp.ndarray, prof_counts: cp.ndarray, center: tuple
+    ):
+        """Kernel for azimuthal average calculation
+
+        Args:
+            image (cp.ndarray): The image from which to calculate the azimuthal average
+            prof (cp.ndarray): A vector containing the bins
+            prof_counts (cp.ndarray): A vector of same size as prof to count each bin
+        """
+        i, j = numba.cuda.grid(2)
+        if i < image.shape[0] and j < image.shape[1]:
+            dist = round(math.sqrt((i - center[1]) ** 2 + (j - center[0]) ** 2))
+            prof[dist] += image[i, j]
+            prof_counts[dist] += 1
+
+    def az_avg_cp(image: cp.ndarray, center: tuple) -> cp.ndarray:
+        """Calculates the azimuthally averaged radial profile.
+
+        Args:
+            image (cp.ndarray): The 2D image
+            center (tuple): The [x,y] pixel coordinates used as the center.
+            Defaults to None,
+            which then uses the center of the image (including fractional pixels).
+
+        Returns:
+            cp.ndarray: prof the radially averaged profile
+        """
+        # Calculate the indices from the image
+        max_r = max(
+            [
+                cp.hypot(center[0], center[1]),
+                cp.hypot(center[0] - image.shape[1], center[1]),
+                cp.hypot(center[0] - image.shape[1], center[1] - image.shape[0]),
+                cp.hypot(center[0], center[1] - image.shape[0]),
+            ]
+        )
+        r = cp.arange(1, int(max_r) + 1, 1)
+        prof = cp.zeros_like(r, dtype=np.float32)
+        prof_counts = cp.zeros_like(r, dtype=np.float32)
+        tpb = 16
+        bpgx = math.ceil(image.shape[0] / tpb)
+        bpgy = math.ceil(image.shape[1] / tpb)
+        _az_avg_cp[(bpgx, bpgy), (tpb, tpb)](image, prof, prof_counts, center)
+        prof /= prof_counts
+        return prof
+
+    @cuda.jit(fastmath=True)
+    def phase_sum_cp(velo: cp.ndarray, cont: cp.ndarray, r: int):
+        """Computes the phase gradient winding in place with a plaquette radius r
+
+        Args:
+            velo (cp.ndarray): Velocity array induced from the phase.
+            velo[0, :, :] is d/dy phi (derivative along rows).
+            cont (cp.ndarray): output array
+            r (int): Radius of the plaquette circulation computation
+        Returns:
+            None
+        """
+        i, j = numba.cuda.grid(2)
+        if i < velo.shape[1] and j < velo.shape[2]:
+            # center of the plaquette
+            ii = (i + r // 2) % velo.shape[-2]
+            jj = (j + r // 2) % velo.shape[-1]
+            for k in range(0, r + 1):
+                cont[ii, jj] += velo[0, i, (j + k) % velo.shape[-1]]
+                cont[ii, jj] -= velo[
+                    0, (i + r) % velo.shape[-2], (j + k) % velo.shape[-1]
+                ]
+                cont[ii, jj] += velo[
+                    1, (i + k) % velo.shape[-2], (j + r) % velo.shape[-1]
+                ]
+                cont[ii, jj] -= velo[1, (i + k) % velo.shape[-2], j]
+
+    def velocity_cp(phase: cp.ndarray, dx: float = 1) -> cp.ndarray:
+        """Returns the velocity from the phase
+
+        Args:
+            phase (np.ndarray): The field phase
+            dx (float, optional): the pixel size in m. Defaults to 1 (adimensional).
+
+        Returns:
+            np.ndarray: The velocity field [vx, vy]
+        """
+        # 1D unwrap
+        phase_unwrap = cp.empty((2, phase.shape[0], phase.shape[1]), dtype=np.float32)
+        phase_unwrap[0, :, :] = cp.unwrap(phase, axis=1)
+        phase_unwrap[1, :, :] = cp.unwrap(phase, axis=0)
+        # gradient reconstruction
+        velo = cp.empty((2, phase.shape[0], phase.shape[1]), dtype=np.float32)
+        velo[0, :, :] = cp.gradient(phase_unwrap[0, :, :], dx, axis=1)
+        velo[1, :, :] = cp.gradient(phase_unwrap[1, :, :], dx, axis=0)
+        return velo
+
+    def velocity_fft_cp(phase: cp.ndarray, dx: float = 1) -> cp.ndarray:
+        """Returns the velocity from the phase using an fft to compute
+        the gradient
+
+        Args:
+            phase (cp.ndarray): The field phase
+            dx (float, optional): the pixel size in m. Defaults to 1 (adimensional).
+
+        Returns:
+            cp.ndarray: The velocity field [vx, vy]
+        """
+        # 1D unwrap
+        phase_unwrap = cp.empty((2, phase.shape[-2], phase.shape[-1]), dtype=np.float32)
+        phase_unwrap[0, :, :] = cp.unwrap(phase, axis=-1)
+        phase_unwrap[1, :, :] = cp.unwrap(phase, axis=-2)
+        # prepare K matrix
+        kx = 2 * np.pi * cp.fft.fftfreq(phase.shape[-1], dx)
+        ky = 2 * np.pi * cp.fft.fftfreq(phase.shape[-2], dx)
+        K = cp.array(cp.meshgrid(kx, ky))
+        # gradient reconstruction
+        velo = cp.empty((2, phase.shape[-2], phase.shape[-1]), dtype=np.float32)
+        velo = cp.fft.irfft2(
+            1j * K[:, :, 0 : K.shape[-1] // 2 + 1] * cp.fft.rfft2(phase_unwrap)
+        )
+        return velo
+
+    def helmholtz_decomp_cp(
+        field: np.ndarray, plot: bool = False, dx: float = 1, regularize: bool = True
+    ) -> tuple:
+        """Decomposes a phase picture into compressible and incompressible velocities
+
+        Args:
+            field (np.ndarray): 2D array of the field
+            plot (bool, optional): Final plots. Defaults to True.
+            dx (float, optional): Spatial sampling size in m. Defaults to 1.
+            regularize (bool, optional): Whether to multiply speed by the amplitude or not.
+        Returns:
+            tuple: (velo, v_incc, v_comp) a tuple containing the velocity field,
+            the incompressible velocity and compressible velocity.
+        """
+        sy, sx = field.shape
+        # meshgrid in k space
+        kx = 2 * np.pi * cp.fft.rfftfreq(sx, d=dx)
+        ky = 2 * np.pi * cp.fft.fftfreq(sy, d=dx)
+        K = cp.array(cp.meshgrid(kx, ky))
+        if regularize:
+            velo = cp.abs(field) * velocity_cp(cp.angle(field))
+        else:
+            velo = velocity_cp(cp.angle(field))
+        v_tot = cp.hypot(velo[0], velo[1])
+        V_k = cp.fft.rfft2(velo)
+        # Helmohltz decomposition fot the compressible part
+        V_comp = -1j * cp.sum(V_k * K, axis=0) / ((cp.sum(K**2, axis=0)) + 1e-15)
+        v_comp = cp.fft.irfft2(1j * V_comp * K)
+        # Helmohltz decomposition fot the incompressible part
+        v_inc = velo - v_comp
+        if plot:
+            flow_inc = cp.hypot(v_inc[0], v_inc[1])
+            flow_comp = cp.hypot(v_comp[0], v_comp[1])
+            YY, XX = np.indices(flow_comp.shape)
+            fig, ax = plt.subplots(2, 2, figsize=[12, 9])
+            im0 = ax[0, 0].imshow(v_tot.get())
+            ax[0, 0].set_title(r"$|v^{tot}|$")
+            ax[0, 0].set_xlabel("x")
+            ax[0, 0].set_ylabel("y")
+            fig.colorbar(im0, ax=ax[0, 0])
+
+            im1 = ax[0, 1].imshow(flow_inc.get())
+            ax[0, 1].set_title(r"$|v^{inc}|$")
+            ax[0, 1].set_xlabel("x")
+            ax[0, 1].set_ylabel("y")
+            fig.colorbar(im1, ax=ax[0, 1])
+
+            im2 = ax[1, 0].imshow(flow_comp.get())
+            ax[1, 0].streamplot(
+                XX,
+                YY,
+                v_comp[0].get(),
+                v_comp[1].get(),
+                density=2.5,
+                color="white",
+                linewidth=1,
+            )
+            ax[1, 0].set_title(r"$|v^{comp}|$")
+            ax[1, 0].set_xlabel("x")
+            ax[1, 0].set_ylabel("y")
+            fig.colorbar(im2, ax=ax[1, 0])
+
+            # flows are calculated by streamplot
+            im3 = ax[1, 1].imshow(flow_inc.get(), cmap="viridis")
+            ax[1, 1].streamplot(
+                XX,
+                YY,
+                v_inc[0].get(),
+                v_inc[1].get(),
+                density=2.5,
+                color="white",
+                linewidth=1,
+            )
+            ax[1, 1].set_title(r"$v^{inc}$")
+            ax[1, 1].set_xlabel("x")
+            ax[1, 1].set_ylabel("y")
+            fig.colorbar(im3, ax=ax[1, 1], label=r"$|v^{inc}|$")
+            plt.show()
+        return velo, v_inc, v_comp
+
+    def energy_cp(ucomp: cp.ndarray, uinc: cp.ndarray) -> tuple:
+        """Computes the total energy contained in the given compressible
+        and incompressible velocities
+
+        Args:
+            ucomp (np.ndarray): Compressible velocity field
+            uinc (np.ndarray): Incompressible velocity field
+
+        Returns:
+            (Ucc, Uii): The total compressible and incompressible energies
+        """
+        # compressible
+        Uc = cp.abs(cp.fft.rfft2(ucomp)) ** 2
+        Ucc = cp.sum(Uc)
+
+        # incompressible
+        Ui = cp.abs(cp.fft.rfft2(uinc)) ** 2
+        Uii = cp.sum(Ui)
+
+        return Ucc, Uii
+
+    def energy_spectrum_cp(ucomp: cp.ndarray, uinc: cp.ndarray) -> cp.ndarray:
+        """Computes the compressible and incompressible energy spectra
+        using the Fourier transform of the velocity fields
+
+        Args:
+            ucomp (np.ndarray): Compressible velocity field
+            uinc (np.ndarray): Incompressible velocity field
+
+        Returns:
+            (Ucc, Uii) np.ndarray: The array containing the compressible / incompressible
+            energies as a function of the wavevector k
+        """
+
+        # compressible
+        Ux_c = cp.abs(cp.fft.fftshift(cp.fft.fft2(ucomp[0])))
+        Uy_c = cp.abs(cp.fft.fftshift(cp.fft.fft2(ucomp[1])))
+        Uc = Ux_c**2 + Uy_c**2
+        Ucc = az_avg_cp(Uc, center=(Uc.shape[1] // 2, Uc.shape[0] // 2))
+
+        # incompressible
+        Ux_i = cp.abs(cp.fft.fftshift(cp.fft.fft2(uinc[0])))
+        Uy_i = cp.abs(cp.fft.fftshift(cp.fft.fft2(uinc[1])))
+        Ui = Ux_i**2 + Uy_i**2
+        Uii = az_avg_cp(Ui, center=(Ui.shape[1] // 2, Ui.shape[0] // 2))
+        return Ucc, Uii
+
+    def vortex_detection_cp(
+        phase: cp.ndarray, plot: bool = False, r: int = 1
+    ) -> cp.ndarray:
+        """Detects the vortex positions using circulation calculation
+
+        Args:
+            phase (np.ndarray): Phase field.
+            plot (bool, optional): Whether to plot the result or not. Defaults to True.
+            r (int or list, optionnal): Radius of the plaquette. Defaults to 1.
+            If the radius is a list, will compute the winding for each radius and then
+            compare the results for each radius by taking the logical AND between the
+            vortices found at each radius.
+
+        Returns:
+            np.ndarray: A list of the vortices position and charge
+        """
+        velo = velocity_cp(phase)
+        if isinstance(r, int):
+            if r > 1:
+                windings = cp.zeros(
+                    (r, phase.shape[-2], phase.shape[-1]), dtype=np.float32
+                )
+            else:
+                windings = cp.zeros_like(velo[0], dtype=np.float32)
+        elif isinstance(r, list):
+            windings = cp.zeros(
+                (len(r), phase.shape[-2], phase.shape[-1]), dtype=np.float32
+            )
+        else:
+            windings = cp.zeros_like(velo[0], dtype=np.float32)
+        tpb = 32
+        bpgx = math.ceil(phase.shape[0] / tpb)
+        bpgy = math.ceil(phase.shape[1] / tpb)
+        if isinstance(r, int):
+            if r > 1:
+                for ir in range(r):
+                    phase_sum_cp[(bpgx, bpgy), (tpb, tpb)](
+                        velo, windings[ir, :, :], ir + 1
+                    )
+                cond_plus = windings > 2 * np.pi
+                cond_plus = cond_plus.all(axis=0)
+                cond_minus = windings < -2 * np.pi
+                cond_minus = cond_minus.all(axis=0)
+            else:
+                phase_sum_cp[(bpgx, bpgy), (tpb, tpb)](velo, windings, r)
+                cond_plus = windings > 2 * np.pi
+                cond_minus = windings < -2 * np.pi
+
+        elif isinstance(r, list):
+            for ir, rr in enumerate(r):
+                phase_sum_cp[(bpgx, bpgy), (tpb, tpb)](velo, windings[ir, :, :], rr)
+            cond_plus = windings > 2 * np.pi
+            cond_plus = cond_plus.all(axis=0)
+            cond_minus = windings < -2 * np.pi
+            cond_minus = cond_minus.all(axis=0)
+
+        else:
+            phase_sum_cp[(bpgx, bpgy), (tpb, tpb)](velo, windings, r)
+            cond_plus = windings > 2 * np.pi
+            cond_minus = windings < -2 * np.pi
+        plus_y, plus_x = cp.where(cond_plus)
+        minus_y, minus_x = cp.where(cond_minus)
+        vortices = cp.zeros((len(plus_x) + len(minus_x), 3), dtype=np.float32)
+        vortices[0 : len(plus_x), 0] = plus_x
+        vortices[0 : len(plus_x), 1] = plus_y
+        vortices[0 : len(plus_x), 2] = 1
+        vortices[len(plus_x) :, 0] = minus_x
+        vortices[len(plus_x) :, 1] = minus_y
+        vortices[len(plus_x) :, 2] = -1
+        if plot:
+            if windings.ndim == 3:
+                windings = windings.mean(axis=0)
+            fig, ax = plt.subplots(1, 2, figsize=[8, 4])
+            im0 = ax[0].imshow(phase.get(), cmap="twilight_shifted")
+            im1 = ax[1].imshow(
+                windings.get(), cmap="seismic", norm=colors.CenteredNorm(vcenter=0)
+            )
+            ax[0].scatter(
+                vortices[:, 0].get(),
+                vortices[:, 1].get(),
+                c=vortices[:, 2].get(),
+                cmap="bwr",
+            )
+            fig.colorbar(im0, ax=ax[0], shrink=0.5, label="Vorticity")
+            fig.colorbar(im1, ax=ax[1], shrink=0.5, label="Winding")
+            plt.show()
+        return vortices
+
+    @cuda.jit(cache=True, fastmath=True)
+    def _distance_matrix(dist: cp.ndarray, x: cp.ndarray, y: cp.ndarray):
+        """Compute distance matrix using CUDA
+
+        Args:
+            x (cp.ndarray): Nd array of points
+            y (cp.ndarray): Nd array of points
+        """
+        i, j = numba.cuda.grid(2)
+        if i < x.shape[0] and j < y.shape[0]:
+            if j >= i:
+                dist[i, j] += math.sqrt(
+                    (x[i, 0] - y[j, 0]) ** 2 + (x[i, 1] - y[j, 1]) ** 2
+                )
+                dist[j, i] = dist[i, j]
+
+    @cuda.jit(cache=True, fastmath=True)
+    def _build_condition(condition: cp.ndarray, dist: cp.ndarray, bins: cp.ndarray):
+        """Constructs the array that represents the vortices pair i, j to consider
+        in the bin k.
+
+        Args:
+            condition (cp.ndarray): Boolean array of shape (k, i, j) where k is an index
+            running in the number of bins, i and j in the number of vortices.
+            dist (cp.ndarray): Distance matrix where D_ij is the distance between the
+            vortex i and j.
+            bins (cp.ndarray): The disk shells of radius r and width d within which we
+            compute the correlations between a vortex and all vortices lying in a bin.
+        """
+        i, j, k = numba.cuda.grid(3)
+        if i < condition.shape[0] and j < condition.shape[1] and k < len(bins):
+            condition[k - 1, i, j] = dist[i, j] > bins[k - 1]
+            condition[k - 1, i, j] &= dist[i, j] < bins[k]
+
+    @cuda.jit(cache=True, fastmath=True)
+    def _correlate(
+        corr: cp.ndarray, vortices: cp.ndarray, bins: cp.ndarray, condition: cp.ndarray
+    ):
+        """Compute the actual correlation function
+
+        Args:
+            corr (cp.ndarray): Output array
+            vortices (cp.ndarray): Vortices array where v_i = (x, y, l)
+            bins (cp.ndarray): Disk shells in which to consider vortices for the correlation
+            calculation
+            condition (cp.ndarray): Which vortices to consider
+        """
+        d = bins[1] - bins[0]
+        i, j, k = numba.cuda.grid(3)
+        if i < condition.shape[0] and j < condition.shape[1] and k < len(bins):
+            if condition[k - 1, i, j]:
+                r = abs(bins[k] - d / 2)
+                corr[k - 1] += (
+                    1
+                    / (2 * np.pi * r * d * vortices.shape[0])
+                    * vortices[i, 2]
+                    * vortices[j, 2]
+                )
+
+    def pair_correlations_cp(vortices: cp.ndarray, bins: cp.ndarray) -> cp.ndarray:
+        """Computes the pair correlation function for a given vortex array.
+        See PHYSICAL REVIEW E 95, 052144 (2017) eq.12
+
+        Args:
+            vortices (np.ndarray): Vortices array
+            bins (np.ndarray): bins of distance in which to compute the
+            correlation function
+
+        Returns:
+            np.ndarray: The correlation function of length len(bins)
+        """
+        corr = cp.zeros(len(bins) - 1)
+        # compute distance matrix of vortices
+        dist_matrix = cp.zeros((vortices.shape[0], vortices.shape[0]), dtype=np.float32)
+        tpb = 32
+        bpgx = math.ceil(dist_matrix.shape[0] / tpb)
+        bpgy = math.ceil(dist_matrix.shape[1] / tpb)
+        _distance_matrix[(bpgx, bpgy), (tpb, tpb)](
+            dist_matrix, vortices[:, 0:2], vortices[:, 0:2]
+        )
+        condition = cp.zeros(
+            (len(bins), dist_matrix.shape[0], dist_matrix.shape[1]), dtype=np.bool8
+        )
+        tpb = 16
+        tpbz = 4
+        bpgx = math.ceil(dist_matrix.shape[0] / tpb)
+        bpgy = math.ceil(dist_matrix.shape[1] / tpb)
+        bpgz = math.ceil(len(bins / tpb))
+        _build_condition[(bpgx, bpgy, bpgz), (tpb, tpb, tpbz)](
+            condition, dist_matrix, bins
+        )
+        _correlate[(bpgx, bpgy, bpgz), (tpb, tpb, tpbz)](
+            corr, vortices, bins, condition
+        )
+        return corr
+
+    def drag_force_cp(psi: cp.ndarray, U: cp.ndarray) -> np.ndarray:
+        """Computes the drag force considering an obstacle map U(r)
+        and an intensity map I(r)
+
+        Args:
+            psi (cp.ndarray): Intensity map
+            U (cp.ndarray): Potential map
+
+        Returns:
+            fx, fy (np.ndarray): The drag force in a.u
+        """
+        if U.dtype == np.complex64:
+            U = cp.real(U)
+        gradx = cp.gradient(U, axis=-1)
+        grady = cp.gradient(U, axis=-2)
+        fx = cp.sum(-gradx * psi, axis=(-2, -1))
+        fy = cp.sum(-grady * psi, axis=(-2, -1))
+        if psi.ndim == 3:
+            f = np.zeros((psi.shape[0], 2))
+            f[:, 0] = fx.get()
+            f[:, 1] = fy.get()
+            return f
+        else:
+            return np.array([fx.get(), fy.get()])
+
 
 @numba.njit(parallel=True, cache=True, fastmath=True, boundscheck=False)
 def az_avg(image: np.ndarray, center: tuple) -> np.ndarray:
@@ -64,56 +531,6 @@ def az_avg(image: np.ndarray, center: tuple) -> np.ndarray:
     return prof
 
 
-@cuda.jit(fastmath=True)
-def _az_avg_cp(
-    image: cp.ndarray, prof: cp.ndarray, prof_counts: cp.ndarray, center: tuple
-):
-    """Kernel for azimuthal average calculation
-
-    Args:
-        image (cp.ndarray): The image from which to calculate the azimuthal average
-        prof (cp.ndarray): A vector containing the bins
-        prof_counts (cp.ndarray): A vector of same size as prof to count each bin
-    """
-    i, j = numba.cuda.grid(2)
-    if i < image.shape[0] and j < image.shape[1]:
-        dist = round(math.sqrt((i - center[1]) ** 2 + (j - center[0]) ** 2))
-        prof[dist] += image[i, j]
-        prof_counts[dist] += 1
-
-
-def az_avg_cp(image: cp.ndarray, center: tuple) -> cp.ndarray:
-    """Calculates the azimuthally averaged radial profile.
-
-    Args:
-        image (cp.ndarray): The 2D image
-        center (tuple): The [x,y] pixel coordinates used as the center.
-        Defaults to None,
-        which then uses the center of the image (including fractional pixels).
-
-    Returns:
-        cp.ndarray: prof the radially averaged profile
-    """
-    # Calculate the indices from the image
-    max_r = max(
-        [
-            cp.hypot(center[0], center[1]),
-            cp.hypot(center[0] - image.shape[1], center[1]),
-            cp.hypot(center[0] - image.shape[1], center[1] - image.shape[0]),
-            cp.hypot(center[0], center[1] - image.shape[0]),
-        ]
-    )
-    r = cp.arange(1, int(max_r) + 1, 1)
-    prof = cp.zeros_like(r, dtype=np.float32)
-    prof_counts = cp.zeros_like(r, dtype=np.float32)
-    tpb = 16
-    bpgx = math.ceil(image.shape[0] / tpb)
-    bpgy = math.ceil(image.shape[1] / tpb)
-    _az_avg_cp[(bpgx, bpgy), (tpb, tpb)](image, prof, prof_counts, center)
-    prof /= prof_counts
-    return prof
-
-
 @numba.njit(
     numba.float32[:, :](numba.float32[:, :, :], numba.int64),
     fastmath=True,
@@ -149,30 +566,6 @@ def phase_sum(velo: np.ndarray, r: int = 1) -> np.ndarray:
     return cont
 
 
-@cuda.jit(fastmath=True)
-def phase_sum_cp(velo: cp.ndarray, cont: cp.ndarray, r: int):
-    """Computes the phase gradient winding in place with a plaquette radius r
-
-    Args:
-        velo (cp.ndarray): Velocity array induced from the phase.
-        velo[0, :, :] is d/dy phi (derivative along rows).
-        cont (cp.ndarray): output array
-        r (int): Radius of the plaquette circulation computation
-    Returns:
-        None
-    """
-    i, j = numba.cuda.grid(2)
-    if i < velo.shape[1] and j < velo.shape[2]:
-        # center of the plaquette
-        ii = (i + r // 2) % velo.shape[-2]
-        jj = (j + r // 2) % velo.shape[-1]
-        for k in range(0, r + 1):
-            cont[ii, jj] += velo[0, i, (j + k) % velo.shape[-1]]
-            cont[ii, jj] -= velo[0, (i + r) % velo.shape[-2], (j + k) % velo.shape[-1]]
-            cont[ii, jj] += velo[1, (i + k) % velo.shape[-2], (j + r) % velo.shape[-1]]
-            cont[ii, jj] -= velo[1, (i + k) % velo.shape[-2], j]
-
-
 def velocity(phase: np.ndarray, dx: float = 1) -> np.ndarray:
     """Returns the velocity from the phase
 
@@ -194,27 +587,6 @@ def velocity(phase: np.ndarray, dx: float = 1) -> np.ndarray:
     return velo
 
 
-def velocity_cp(phase: cp.ndarray, dx: float = 1) -> cp.ndarray:
-    """Returns the velocity from the phase
-
-    Args:
-        phase (np.ndarray): The field phase
-        dx (float, optional): the pixel size in m. Defaults to 1 (adimensional).
-
-    Returns:
-        np.ndarray: The velocity field [vx, vy]
-    """
-    # 1D unwrap
-    phase_unwrap = cp.empty((2, phase.shape[0], phase.shape[1]), dtype=np.float32)
-    phase_unwrap[0, :, :] = cp.unwrap(phase, axis=1)
-    phase_unwrap[1, :, :] = cp.unwrap(phase, axis=0)
-    # gradient reconstruction
-    velo = cp.empty((2, phase.shape[0], phase.shape[1]), dtype=np.float32)
-    velo[0, :, :] = cp.gradient(phase_unwrap[0, :, :], dx, axis=1)
-    velo[1, :, :] = cp.gradient(phase_unwrap[1, :, :], dx, axis=0)
-    return velo
-
-
 def velocity_fft(phase: np.ndarray, dx: float = 1) -> np.ndarray:
     """Returns the velocity from the phase using an fft to compute
     the gradient
@@ -241,33 +613,6 @@ def velocity_fft(phase: np.ndarray, dx: float = 1) -> np.ndarray:
     return velo
 
 
-def velocity_fft_cp(phase: cp.ndarray, dx: float = 1) -> cp.ndarray:
-    """Returns the velocity from the phase using an fft to compute
-    the gradient
-
-    Args:
-        phase (cp.ndarray): The field phase
-        dx (float, optional): the pixel size in m. Defaults to 1 (adimensional).
-
-    Returns:
-        cp.ndarray: The velocity field [vx, vy]
-    """
-    # 1D unwrap
-    phase_unwrap = cp.empty((2, phase.shape[-2], phase.shape[-1]), dtype=np.float32)
-    phase_unwrap[0, :, :] = cp.unwrap(phase, axis=-1)
-    phase_unwrap[1, :, :] = cp.unwrap(phase, axis=-2)
-    # prepare K matrix
-    kx = 2 * np.pi * cp.fft.fftfreq(phase.shape[-1], dx)
-    ky = 2 * np.pi * cp.fft.fftfreq(phase.shape[-2], dx)
-    K = cp.array(cp.meshgrid(kx, ky))
-    # gradient reconstruction
-    velo = cp.empty((2, phase.shape[-2], phase.shape[-1]), dtype=np.float32)
-    velo = cp.fft.irfft2(
-        1j * K[:, :, 0 : K.shape[-1] // 2 + 1] * cp.fft.rfft2(phase_unwrap)
-    )
-    return velo
-
-
 def helmholtz_decomp(field: np.ndarray, plot=False, dx: float = 1) -> tuple:
     """Decomposes a phase picture into compressible and incompressible velocities
 
@@ -336,87 +681,6 @@ def helmholtz_decomp(field: np.ndarray, plot=False, dx: float = 1) -> tuple:
     return velo, v_inc, v_comp
 
 
-def helmholtz_decomp_cp(
-    field: np.ndarray, plot: bool = False, dx: float = 1, regularize: bool = True
-) -> tuple:
-    """Decomposes a phase picture into compressible and incompressible velocities
-
-    Args:
-        field (np.ndarray): 2D array of the field
-        plot (bool, optional): Final plots. Defaults to True.
-        dx (float, optional): Spatial sampling size in m. Defaults to 1.
-        regularize (bool, optional): Whether to multiply speed by the amplitude or not.
-    Returns:
-        tuple: (velo, v_incc, v_comp) a tuple containing the velocity field,
-        the incompressible velocity and compressible velocity.
-    """
-    sy, sx = field.shape
-    # meshgrid in k space
-    kx = 2 * np.pi * cp.fft.rfftfreq(sx, d=dx)
-    ky = 2 * np.pi * cp.fft.fftfreq(sy, d=dx)
-    K = cp.array(cp.meshgrid(kx, ky))
-    if regularize:
-        velo = cp.abs(field) * velocity_cp(cp.angle(field))
-    else:
-        velo = velocity_cp(cp.angle(field))
-    v_tot = cp.hypot(velo[0], velo[1])
-    V_k = cp.fft.rfft2(velo)
-    # Helmohltz decomposition fot the compressible part
-    V_comp = -1j * cp.sum(V_k * K, axis=0) / ((cp.sum(K**2, axis=0)) + 1e-15)
-    v_comp = cp.fft.irfft2(1j * V_comp * K)
-    # Helmohltz decomposition fot the incompressible part
-    v_inc = velo - v_comp
-    if plot:
-        flow_inc = cp.hypot(v_inc[0], v_inc[1])
-        flow_comp = cp.hypot(v_comp[0], v_comp[1])
-        YY, XX = np.indices(flow_comp.shape)
-        fig, ax = plt.subplots(2, 2, figsize=[12, 9])
-        im0 = ax[0, 0].imshow(v_tot.get())
-        ax[0, 0].set_title(r"$|v^{tot}|$")
-        ax[0, 0].set_xlabel("x")
-        ax[0, 0].set_ylabel("y")
-        fig.colorbar(im0, ax=ax[0, 0])
-
-        im1 = ax[0, 1].imshow(flow_inc.get())
-        ax[0, 1].set_title(r"$|v^{inc}|$")
-        ax[0, 1].set_xlabel("x")
-        ax[0, 1].set_ylabel("y")
-        fig.colorbar(im1, ax=ax[0, 1])
-
-        im2 = ax[1, 0].imshow(flow_comp.get())
-        ax[1, 0].streamplot(
-            XX,
-            YY,
-            v_comp[0].get(),
-            v_comp[1].get(),
-            density=2.5,
-            color="white",
-            linewidth=1,
-        )
-        ax[1, 0].set_title(r"$|v^{comp}|$")
-        ax[1, 0].set_xlabel("x")
-        ax[1, 0].set_ylabel("y")
-        fig.colorbar(im2, ax=ax[1, 0])
-
-        # flows are calculated by streamplot
-        im3 = ax[1, 1].imshow(flow_inc.get(), cmap="viridis")
-        ax[1, 1].streamplot(
-            XX,
-            YY,
-            v_inc[0].get(),
-            v_inc[1].get(),
-            density=2.5,
-            color="white",
-            linewidth=1,
-        )
-        ax[1, 1].set_title(r"$v^{inc}$")
-        ax[1, 1].set_xlabel("x")
-        ax[1, 1].set_ylabel("y")
-        fig.colorbar(im3, ax=ax[1, 1], label=r"$|v^{inc}|$")
-        plt.show()
-    return velo, v_inc, v_comp
-
-
 def energy(ucomp: np.ndarray, uinc: np.ndarray) -> tuple:
     """Computes the total energy contained in the given compressible
     and incompressible velocities
@@ -467,54 +731,6 @@ def energy_spectrum(ucomp: np.ndarray, uinc: np.ndarray) -> np.ndarray:
     return Ucc, Uii
 
 
-def energy_cp(ucomp: cp.ndarray, uinc: cp.ndarray) -> tuple:
-    """Computes the total energy contained in the given compressible
-    and incompressible velocities
-
-    Args:
-        ucomp (np.ndarray): Compressible velocity field
-        uinc (np.ndarray): Incompressible velocity field
-
-    Returns:
-        (Ucc, Uii): The total compressible and incompressible energies
-    """
-    # compressible
-    Uc = cp.abs(cp.fft.rfft2(ucomp)) ** 2
-    Ucc = cp.sum(Uc)
-
-    # incompressible
-    Ui = cp.abs(cp.fft.rfft2(uinc)) ** 2
-    Uii = cp.sum(Ui)
-
-    return Ucc, Uii
-
-
-def energy_spectrum_cp(ucomp: cp.ndarray, uinc: cp.ndarray) -> cp.ndarray:
-    """Computes the compressible and incompressible energy spectra
-    using the Fourier transform of the velocity fields
-
-    Args:
-        ucomp (np.ndarray): Compressible velocity field
-        uinc (np.ndarray): Incompressible velocity field
-
-    Returns:
-        (Ucc, Uii) np.ndarray: The array containing the compressible / incompressible
-        energies as a function of the wavevector k
-    """
-    # compressible
-    Ux_c = cp.abs(cp.fft.fftshift(cp.fft.fft2(ucomp[0])))
-    Uy_c = cp.abs(cp.fft.fftshift(cp.fft.fft2(ucomp[1])))
-    Uc = Ux_c**2 + Uy_c**2
-    Ucc = az_avg_cp(Uc, center=(Uc.shape[1] // 2, Uc.shape[0] // 2))
-
-    # incompressible
-    Ux_i = cp.abs(cp.fft.fftshift(cp.fft.fft2(uinc[0])))
-    Uy_i = cp.abs(cp.fft.fftshift(cp.fft.fft2(uinc[1])))
-    Ui = Ux_i**2 + Uy_i**2
-    Uii = az_avg_cp(Ui, center=(Ui.shape[1] // 2, Ui.shape[0] // 2))
-    return Ucc, Uii
-
-
 def vortex_detection(phase: np.ndarray, plot: bool = False, r: int = 1) -> np.ndarray:
     """Detects the vortex positions using circulation calculation
 
@@ -562,91 +778,6 @@ def vortex_detection(phase: np.ndarray, plot: bool = False, r: int = 1) -> np.nd
     return vortices
 
 
-def vortex_detection_cp(
-    phase: cp.ndarray, plot: bool = False, r: int = 1
-) -> cp.ndarray:
-    """Detects the vortex positions using circulation calculation
-
-    Args:
-        phase (np.ndarray): Phase field.
-        plot (bool, optional): Whether to plot the result or not. Defaults to True.
-        r (int or list, optionnal): Radius of the plaquette. Defaults to 1.
-        If the radius is a list, will compute the winding for each radius and then
-        compare the results for each radius by taking the logical AND between the
-        vortices found at each radius.
-
-    Returns:
-        np.ndarray: A list of the vortices position and charge
-    """
-    velo = velocity_cp(phase)
-    if isinstance(r, int):
-        if r > 1:
-            windings = cp.zeros((r, phase.shape[-2], phase.shape[-1]), dtype=np.float32)
-        else:
-            windings = cp.zeros_like(velo[0], dtype=np.float32)
-    elif isinstance(r, list):
-        windings = cp.zeros(
-            (len(r), phase.shape[-2], phase.shape[-1]), dtype=np.float32
-        )
-    else:
-        windings = cp.zeros_like(velo[0], dtype=np.float32)
-    tpb = 32
-    bpgx = math.ceil(phase.shape[0] / tpb)
-    bpgy = math.ceil(phase.shape[1] / tpb)
-    if isinstance(r, int):
-        if r > 1:
-            for ir in range(r):
-                phase_sum_cp[(bpgx, bpgy), (tpb, tpb)](velo, windings[ir, :, :], ir + 1)
-            cond_plus = windings > 2 * np.pi
-            cond_plus = cond_plus.all(axis=0)
-            cond_minus = windings < -2 * np.pi
-            cond_minus = cond_minus.all(axis=0)
-        else:
-            phase_sum_cp[(bpgx, bpgy), (tpb, tpb)](velo, windings, r)
-            cond_plus = windings > 2 * np.pi
-            cond_minus = windings < -2 * np.pi
-
-    elif isinstance(r, list):
-        for ir, rr in enumerate(r):
-            phase_sum_cp[(bpgx, bpgy), (tpb, tpb)](velo, windings[ir, :, :], rr)
-        cond_plus = windings > 2 * np.pi
-        cond_plus = cond_plus.all(axis=0)
-        cond_minus = windings < -2 * np.pi
-        cond_minus = cond_minus.all(axis=0)
-
-    else:
-        phase_sum_cp[(bpgx, bpgy), (tpb, tpb)](velo, windings, r)
-        cond_plus = windings > 2 * np.pi
-        cond_minus = windings < -2 * np.pi
-    plus_y, plus_x = cp.where(cond_plus)
-    minus_y, minus_x = cp.where(cond_minus)
-    vortices = cp.zeros((len(plus_x) + len(minus_x), 3), dtype=np.float32)
-    vortices[0 : len(plus_x), 0] = plus_x
-    vortices[0 : len(plus_x), 1] = plus_y
-    vortices[0 : len(plus_x), 2] = 1
-    vortices[len(plus_x) :, 0] = minus_x
-    vortices[len(plus_x) :, 1] = minus_y
-    vortices[len(plus_x) :, 2] = -1
-    if plot:
-        if windings.ndim == 3:
-            windings = windings.mean(axis=0)
-        fig, ax = plt.subplots(1, 2, figsize=[8, 4])
-        im0 = ax[0].imshow(phase.get(), cmap="twilight_shifted")
-        im1 = ax[1].imshow(
-            windings.get(), cmap="seismic", norm=colors.CenteredNorm(vcenter=0)
-        )
-        ax[0].scatter(
-            vortices[:, 0].get(),
-            vortices[:, 1].get(),
-            c=vortices[:, 2].get(),
-            cmap="bwr",
-        )
-        fig.colorbar(im0, ax=ax[0], shrink=0.5, label="Vorticity")
-        fig.colorbar(im1, ax=ax[1], shrink=0.5, label="Winding")
-        plt.show()
-    return vortices
-
-
 @numba.njit(
     numba.bool_[:](numba.int64[:]), cache=True, fastmath=True, boundscheck=False
 )
@@ -934,101 +1065,7 @@ def ck(vortices: np.ndarray, k: int) -> float:
     return c
 
 
-@cuda.jit(cache=True, fastmath=True)
-def _distance_matrix(dist: cp.ndarray, x: cp.ndarray, y: cp.ndarray):
-    """Compute distance matrix using CUDA
-
-    Args:
-        x (cp.ndarray): Nd array of points
-        y (cp.ndarray): Nd array of points
-    """
-    i, j = numba.cuda.grid(2)
-    if i < x.shape[0] and j < y.shape[0]:
-        if j >= i:
-            dist[i, j] += math.sqrt((x[i, 0] - y[j, 0]) ** 2 + (x[i, 1] - y[j, 1]) ** 2)
-            dist[j, i] = dist[i, j]
-
-
-@cuda.jit(cache=True, fastmath=True)
-def _build_condition(condition: cp.ndarray, dist: cp.ndarray, bins: cp.ndarray):
-    """Constructs the array that represents the vortices pair i, j to consider
-    in the bin k.
-
-    Args:
-        condition (cp.ndarray): Boolean array of shape (k, i, j) where k is an index
-        running in the number of bins, i and j in the number of vortices.
-        dist (cp.ndarray): Distance matrix where D_ij is the distance between the
-        vortex i and j.
-        bins (cp.ndarray): The disk shells of radius r and width d within which we
-        compute the correlations between a vortex and all vortices lying in a bin.
-    """
-    i, j, k = numba.cuda.grid(3)
-    if i < condition.shape[0] and j < condition.shape[1] and k < len(bins):
-        condition[k - 1, i, j] = dist[i, j] > bins[k - 1]
-        condition[k - 1, i, j] &= dist[i, j] < bins[k]
-
-
-@cuda.jit(cache=True, fastmath=True)
-def _correlate(
-    corr: cp.ndarray, vortices: cp.ndarray, bins: cp.ndarray, condition: cp.ndarray
-):
-    """Compute the actual correlation function
-
-    Args:
-        corr (cp.ndarray): Output array
-        vortices (cp.ndarray): Vortices array where v_i = (x, y, l)
-        bins (cp.ndarray): Disk shells in which to consider vortices for the correlation
-        calculation
-        condition (cp.ndarray): Which vortices to consider
-    """
-    d = bins[1] - bins[0]
-    i, j, k = numba.cuda.grid(3)
-    if i < condition.shape[0] and j < condition.shape[1] and k < len(bins):
-        if condition[k - 1, i, j]:
-            r = abs(bins[k] - d / 2)
-            corr[k - 1] += (
-                1
-                / (2 * np.pi * r * d * vortices.shape[0])
-                * vortices[i, 2]
-                * vortices[j, 2]
-            )
-
-
-def pair_correlations_cp(vortices: cp.ndarray, bins: cp.ndarray) -> cp.ndarray:
-    """Computes the pair correlation function for a given vortex array.
-    See PHYSICAL REVIEW E 95, 052144 (2017) eq.12
-
-    Args:
-        vortices (np.ndarray): Vortices array
-        bins (np.ndarray): bins of distance in which to compute the
-        correlation function
-
-    Returns:
-        np.ndarray: The correlation function of length len(bins)
-    """
-    corr = cp.zeros(len(bins) - 1)
-    # compute distance matrix of vortices
-    dist_matrix = cp.zeros((vortices.shape[0], vortices.shape[0]), dtype=np.float32)
-    tpb = 32
-    bpgx = math.ceil(dist_matrix.shape[0] / tpb)
-    bpgy = math.ceil(dist_matrix.shape[1] / tpb)
-    _distance_matrix[(bpgx, bpgy), (tpb, tpb)](
-        dist_matrix, vortices[:, 0:2], vortices[:, 0:2]
-    )
-    condition = cp.zeros(
-        (len(bins), dist_matrix.shape[0], dist_matrix.shape[1]), dtype=np.bool8
-    )
-    tpb = 16
-    tpbz = 4
-    bpgx = math.ceil(dist_matrix.shape[0] / tpb)
-    bpgy = math.ceil(dist_matrix.shape[1] / tpb)
-    bpgz = math.ceil(len(bins / tpb))
-    _build_condition[(bpgx, bpgy, bpgz), (tpb, tpb, tpbz)](condition, dist_matrix, bins)
-    _correlate[(bpgx, bpgy, bpgz), (tpb, tpb, tpbz)](corr, vortices, bins, condition)
-    return corr
-
-
-def drag_force(psi: np.ndarray, U: np.ndarray) -> tuple:
+def drag_force(psi: np.ndarray, U: np.ndarray) -> tuple[float, float]:
     """Computes the drag force considering an obstacle map U(r)
     and an intensity map I(r)
 
@@ -1051,29 +1088,3 @@ def drag_force(psi: np.ndarray, U: np.ndarray) -> tuple:
         f[:, 1] = fy
         return f
     return (fx, fy)
-
-
-def drag_force_cp(psi: cp.ndarray, U: cp.ndarray) -> tuple:
-    """Computes the drag force considering an obstacle map U(r)
-    and an intensity map I(r)
-
-    Args:
-        psi (cp.ndarray): Intensity map
-        U (cp.ndarray): Potential map
-
-    Returns:
-        fx, fy (float): The drag force in a.u
-    """
-    if U.dtype == np.complex64:
-        U = cp.real(U)
-    gradx = cp.gradient(U, axis=-1)
-    grady = cp.gradient(U, axis=-2)
-    fx = cp.sum(-gradx * psi, axis=(-2, -1))
-    fy = cp.sum(-grady * psi, axis=(-2, -1))
-    if psi.ndim == 3:
-        f = np.zeros((psi.shape[0], 2))
-        f[:, 0] = fx.get()
-        f[:, 1] = fy.get()
-        return f
-    else:
-        return np.array([fx.get(), fy.get()])
diff --git a/setup.py b/setup.py
index 0e67393..db5fb98 100644
--- a/setup.py
+++ b/setup.py
@@ -19,7 +19,6 @@
         "pyfftw",
         "numba",
         "scikit-image",
-        "cupy",
         "networkx",
         "numbalsoda",
         "screeninfo",