diff --git a/apps/SECPriority/CMakeLists.txt b/apps/SECPriority/CMakeLists.txt
index c3e7d826..7b66fd4c 100644
--- a/apps/SECPriority/CMakeLists.txt
+++ b/apps/SECPriority/CMakeLists.txt
@@ -4,6 +4,7 @@ set(SOURCE_LIST
     secp.cu
 	secp_rxmesh.cuh
 	secp_kernels.cuh
+	secp_pair.h
 )
 
 target_sources(SECPriority
diff --git a/apps/SECPriority/secp_kernels.cuh b/apps/SECPriority/secp_kernels.cuh
index 35aca5bd..65f90229 100644
--- a/apps/SECPriority/secp_kernels.cuh
+++ b/apps/SECPriority/secp_kernels.cuh
@@ -2,14 +2,12 @@
 #include "../Remesh/link_condition.cuh"
 #include "rxmesh/cavity_manager.cuh"
 
-#include <cooperative_groups.h>
-#include <cuda_runtime.h>
+#include "secp_pair.h"
 
 template <typename T, uint32_t blockThreads>
 __global__ static void secp(rxmesh::Context             context,
                             rxmesh::VertexAttribute<T>  coords,
-                            const int                   reduce_threshold,
-                            rxmesh::EdgeAttribute<bool> e_pop_attr)
+                            rxmesh::EdgeAttribute<bool> to_collapse)
 {
     using namespace rxmesh;
     auto           block = cooperative_groups::this_thread_block();
@@ -40,12 +38,12 @@ __global__ static void secp(rxmesh::Context             context,
     ev_query.prologue<Op::EV>(block, shrd_alloc);
     block.sync();
 
-    // 1a) mark edge we want to collapse given e_pop_attr
+    // 1a) mark edge we want to collapse given to_collapse
     for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) {
         assert(eh.local_id() < cavity.patch_info().num_edges[0]);
 
-        // edge_mask.set(eh.local_id(), e_pop_attr(eh));
-        if (true == e_pop_attr(eh)) {
+        // edge_mask.set(eh.local_id(), to_collapse(eh));
+        if (to_collapse(eh)) {
             edge_mask.set(eh.local_id(), true);
         }
     });
@@ -73,7 +71,7 @@ __global__ static void secp(rxmesh::Context             context,
     ev_query.epilogue(block, shrd_alloc);
 
     // create the cavity
-    if (cavity.prologue(block, shrd_alloc, coords)) {
+    if (cavity.prologue(block, shrd_alloc, coords, to_collapse)) {
         edge_mask.reset(block);
         block.sync();
 
@@ -145,7 +143,7 @@ template <typename T, uint32_t blockThreads>
 __global__ static void compute_edge_priorities(
     rxmesh::Context                  context,
     const rxmesh::VertexAttribute<T> coords,
-    PQView_t                         pq_view,
+    PQViewT                          pq_view,
     size_t                           pq_num_bytes)
 {
     using namespace rxmesh;
@@ -154,8 +152,9 @@ __global__ static void compute_edge_priorities(
     ShmemAllocator   shrd_alloc;
 
     Query<blockThreads> query(context);
-    auto                intermediatePairs =
-        shrd_alloc.alloc<PriorityPair_t>(query.get_patch_info().num_edges[0]);
+
+    PriorityPairT* s_pairs =
+        shrd_alloc.alloc<PriorityPairT>(query.get_patch_info().num_edges[0]);
     __shared__ int pair_counter;
     pair_counter = 0;
 
@@ -163,27 +162,24 @@ __global__ static void compute_edge_priorities(
         const VertexHandle v0 = iter[0];
         const VertexHandle v1 = iter[1];
 
-        const Vec3<T> p0(coords(v0, 0), coords(v0, 1), coords(v0, 2));
-        const Vec3<T> p1(coords(v1, 0), coords(v1, 1), coords(v1, 2));
+        const vec3<T> p0 = coords.to_glm<3>(v0);
+        const vec3<T> p1 = coords.to_glm<3>(v1);
 
-        T len2 = glm::distance2(p0, p1);
+        const T len2 = glm::distance2(p0, p1);
 
-        auto p_e = rxmesh::detail::unpack(eh.unique_id());
-        // printf("p_id:%u\te_id:%hu\n", p_e.first, p_e.second);
-        // printf("e_id:%llu\t, len:%f\n", eh.unique_id(), len2);
+        assert(eh.patch_id() < (1 << 16));
 
         // repack the EdgeHandle into smaller 32 bits for
         // use with priority queue. Need to check elsewhere
         // that there are less than 2^16 patches.
-        auto id32 = unique_id32(p_e.second, (uint16_t)p_e.first);
-        // auto p_e_32 = unpack32(id32);
-        // printf("32bit p_id:%hu\te_id:%hu\n", p_e_32.first, p_e_32.second);
+        const uint32_t id32 =
+            unique_id32(eh.local_id(), (uint16_t)eh.patch_id());
+
+        const PriorityPairT p{len2, id32};
 
-        PriorityPair_t p{len2, id32};
-        // PriorityPair_t p{len2, eh};
+        int val_counter = atomicAdd(&pair_counter, 1);
 
-        auto val_counter               = atomicAdd(&pair_counter, 1);
-        intermediatePairs[val_counter] = p;
+        s_pairs[val_counter] = p;
     };
 
     auto block = cooperative_groups::this_thread_block();
@@ -191,16 +187,13 @@ __global__ static void compute_edge_priorities(
     block.sync();
 
     char* pq_shrd_mem = shrd_alloc.alloc(pq_num_bytes);
-    pq_view.push(block,
-                 intermediatePairs,
-                 intermediatePairs + pair_counter,
-                 pq_shrd_mem);
+    pq_view.push(block, s_pairs, s_pairs + pair_counter, pq_shrd_mem);
 }
 
 template <uint32_t blockThreads>
 __global__ static void pop_and_mark_edges_to_collapse(
-    PQView_t                    pq_view,
-    rxmesh::EdgeAttribute<bool> marked_edges,
+    PQViewT                     pq_view,
+    rxmesh::EdgeAttribute<bool> to_collapse,
     uint32_t                    pop_num_edges)
 {
     // setup shared memory array to store the popped pairs
@@ -210,24 +203,24 @@ __global__ static void pop_and_mark_edges_to_collapse(
     using namespace rxmesh;
     ShmemAllocator shrd_alloc;
 
-    auto  intermediatePairs = shrd_alloc.alloc<PriorityPair_t>(blockThreads);
-    char* pq_shrd_mem  = shrd_alloc.alloc(pq_view.get_shmem_size(blockThreads));
+    PriorityPairT* s_pairs = shrd_alloc.alloc<PriorityPairT>(blockThreads);
+
+    char* pq_shrd_mem = shrd_alloc.alloc(pq_view.get_shmem_size(blockThreads));
+
     cg::thread_block g = cg::this_thread_block();
-    pq_view.pop(
-        g, intermediatePairs, intermediatePairs + blockThreads, pq_shrd_mem);
 
-    int tid       = blockIdx.x * blockDim.x + threadIdx.x;
-    int local_tid = threadIdx.x;
+    pq_view.pop(g, s_pairs, s_pairs + blockThreads, pq_shrd_mem);
+
+    int tid = blockIdx.x * blockDim.x + threadIdx.x;
 
     // Make sure the index is within bounds
     if (tid < pop_num_edges) {
-        // printf("tid: %d\n", tid);
         // unpack the uid to get the patch and edge ids
-        auto p_e = unpack32(intermediatePairs[local_tid].second);
-        // printf("32bit p_id:%hu\te_id:%hu\n", p_e.first, p_e.second);
-        rxmesh::EdgeHandle eh(p_e.first, rxmesh::LocalEdgeT(p_e.second));
+        auto [patch_id, local_id] = unpack32(s_pairs[threadIdx.x].second);
+
+        EdgeHandle eh(patch_id, LocalEdgeT(local_id));
 
         // use the eh to index into a passed in edge attribute
-        marked_edges(eh) = true;
+        to_collapse(eh) = true;
     }
 }
diff --git a/apps/SECPriority/secp_pair.h b/apps/SECPriority/secp_pair.h
new file mode 100644
index 00000000..0571e18c
--- /dev/null
+++ b/apps/SECPriority/secp_pair.h
@@ -0,0 +1,60 @@
+#pragma once
+
+#include <cuco/pair.cuh>
+#include <cuco/priority_queue.cuh>
+
+
+/**
+ * @brief Return unique index of the local mesh element composed by the
+ * patch id and the local index
+ *
+ * @param local_id the local within-patch mesh element id
+ * @param patch_id the patch owning the mesh element
+ * @return
+ */
+constexpr __device__ __host__ __forceinline__ uint32_t
+unique_id32(const uint16_t local_id, const uint16_t patch_id)
+{
+    uint32_t ret = patch_id;
+    ret          = (ret << 16);
+    ret |= local_id;
+    return ret;
+}
+
+
+/**
+ * @brief unpack a 32 uint to its high and low 16 bits.
+ * This is used to convert the unique id to its local id (16
+ * low bit) and patch id (high 16 bit)
+ * @param uid unique id
+ * @return a std::pair storing the patch id and local id
+ */
+constexpr __device__ __host__ __forceinline__ std::pair<uint16_t, uint16_t>
+                                              unpack32(uint32_t uid)
+{
+    uint16_t local_id = uid & ((1 << 16) - 1);
+    uint16_t patch_id = uid >> 16;
+    return std::make_pair(patch_id, local_id);
+}
+
+
+/**
+ * @brief less than operator for std::pair
+ * @tparam T
+ */
+template <typename T>
+struct pair_less
+{
+    __host__ __device__ __forceinline__ bool operator()(const T& a,
+                                                        const T& b) const
+    {
+        return a.first < b.first;
+    }
+};
+
+
+// Priority queue setup. Use 'pair_less' to prioritize smaller values.
+using PriorityPairT   = cuco::pair<float, uint32_t>;
+using PriorityCompare = pair_less<PriorityPairT>;
+using PriorityQueueT  = cuco::priority_queue<PriorityPairT, PriorityCompare>;
+using PQViewT         = PriorityQueueT::device_mutable_view;
diff --git a/apps/SECPriority/secp_rxmesh.cuh b/apps/SECPriority/secp_rxmesh.cuh
index bddb4b4f..94bc5471 100644
--- a/apps/SECPriority/secp_rxmesh.cuh
+++ b/apps/SECPriority/secp_rxmesh.cuh
@@ -1,98 +1,10 @@
 #pragma once
-
-#define GLM_ENABLE_EXPERIMENTAL
-#include <glm/glm.hpp>
-#include <glm/gtx/norm.hpp>
-
-
 #include "rxmesh/query.cuh"
 #include "rxmesh/rxmesh_dynamic.h"
-
-// Priority Queue related includes
-#include <cuco/pair.cuh>
-#include <cuco/priority_queue.cuh>
-
-#include <thrust/device_vector.h>
-#include <thrust/host_vector.h>
-
-/**
- * @brief Return unique index of the local mesh element composed by the
- * patch id and the local index
- *
- * @param local_id the local within-patch mesh element id
- * @param patch_id the patch owning the mesh element
- * @return
- */
-constexpr __device__ __host__ __forceinline__ uint32_t
-unique_id32(const uint16_t local_id, const uint16_t patch_id)
-{
-    uint32_t ret = patch_id;
-    ret          = (ret << 16);
-    ret |= local_id;
-    return ret;
-}
-
-/**
- * @brief unpack a 32 uint to its high and low 16 bits.
- * This is used to convert the unique id to its local id (16
- * low bit) and patch id (high 16 bit)
- * @param uid unique id
- * @return a std::pair storing the patch id and local id
- */
-constexpr __device__ __host__ __forceinline__ std::pair<uint16_t, uint16_t>
-                                              unpack32(uint32_t uid)
-{
-    uint16_t local_id = uid & ((1 << 16) - 1);
-    uint16_t patch_id = uid >> 16;
-    return std::make_pair(patch_id, local_id);
-}
-
-// Priority queue setup. Use 'pair_less' to prioritize smaller values.
-template <typename T>
-struct pair_less
-{
-    __host__ __device__ bool operator()(const T& a, const T& b) const
-    {
-        return a.first < b.first;
-    }
-};
-
-using PriorityPair_t  = cuco::pair<float, uint32_t>;
-using PriorityCompare = pair_less<PriorityPair_t>;
-using PriorityQueue_t = cuco::priority_queue<PriorityPair_t, PriorityCompare>;
-using PQView_t        = PriorityQueue_t::device_mutable_view;
-
-
-template <typename T>
-using Vec3 = glm::vec<3, T, glm::defaultp>;
-
-#include "secp_kernels.cuh"
-
 #include "rxmesh/util/report.h"
 
-template <typename T>
-void render_edge_attr(
-    rxmesh::RXMeshDynamic&                           rx,
-    const std::shared_ptr<rxmesh::EdgeAttribute<T>>& edge_attr)
-{
-    using namespace rxmesh;
-    // make sure the attribute is on the HOST
-    edge_attr->move(DEVICE, HOST);
-
-    std::vector<float> edgeColors(rx.get_num_edges());
-    rx.for_each_edge(HOST, [&](EdgeHandle eh) {
-        if (true == (*edge_attr)(eh)) {
-            edgeColors[rx.linear_id(eh)] = 200.0f;
-        } else {
-            edgeColors[rx.linear_id(eh)] = eh.patch_id();
-        }
-    });
-
-    auto ps_mesh = rx.get_polyscope_mesh();
-    auto edge_colors =
-        ps_mesh->addEdgeScalarQuantity("Edges to Collapse", edgeColors);
-    edge_colors->setEnabled(true);
-}
+#include "secp_kernels.cuh"
+#include "secp_pair.h"
 
 inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx,
                         const uint32_t         final_num_vertices,
@@ -103,7 +15,7 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx,
     using namespace rxmesh;
     constexpr uint32_t blockThreads = 256;
 
-    rxmesh::Report report("SECP_RXMesh");
+    Report report("SECP_RXMesh");
     report.command_line(Arg.argc, Arg.argv);
     report.device();
     report.system();
@@ -115,15 +27,17 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx,
 
     LaunchBox<blockThreads> launch_box;
 
-    float total_time      = 0;
-    float app_time        = 0;
-    float slice_time      = 0;
-    float cleanup_time    = 0;
-    float pq_time         = 0;
-    float pop_mark_time   = 0;
-    float e_priority_time = 0;
+    Timers<GPUTimer> timers;
+    timers.add("Total");
 
-    auto e_pop_attr = rx.add_edge_attribute<bool>("ePop", 1);
+    timers.add("App");
+    timers.add("Slice");
+    timers.add("Cleanup");
+    timers.add("PriorityQueue");
+    timers.add("PriorityQueuePop");
+    timers.add("EdgePriority");
+
+    auto to_collapse = rx.add_edge_attribute<bool>("ePop", 1);
 
     RXMESH_INFO("#Vertices {}", rx.get_num_vertices());
     RXMESH_INFO("#Edges {}", rx.get_num_edges());
@@ -147,20 +61,18 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx,
     int num_passes = 0;
 
     CUDA_ERROR(cudaProfilerStart());
-    GPUTimer timer;
-    timer.start();
+
+    timers.start("Total");
     while (rx.get_num_vertices(true) > final_num_vertices) {
         ++num_passes;
 
-        GPUTimer pq_timer;
-        pq_timer.start();
+        timers.start("PriorityQueue");
 
-        // rebuild every round? Not necessarily a great way to use a pq.
-        PriorityQueue_t pq(rx.get_num_edges());
-        e_pop_attr->reset(false, DEVICE);
-
-        // rx.prepare_launch_box(
-        rx.update_launch_box(
+        // rebuild every round? Not necessarily a great way to use a priority
+        // queue.
+        PriorityQueueT priority_queue(rx.get_num_edges());
+        to_collapse->reset(false, DEVICE);
+        rx.prepare_launch_box(
             {Op::EV},
             launch_box,
             (void*)compute_edge_priorities<float, blockThreads>,
@@ -171,70 +83,58 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx,
             [&](uint32_t v, uint32_t e, uint32_t f) {
                 // Allocate enough additional memory
                 // for the priority queue and the intermediate
-                // array of PriorityPair_t.
-                return pq.get_shmem_size(blockThreads) +
-                       (e * sizeof(PriorityPair_t));
+                // array of PriorityPairT.
+                return priority_queue.get_shmem_size(blockThreads) +
+                       (e * sizeof(PriorityPairT));
             });
 
-        GPUTimer edge_priorities_timer;
-        edge_priorities_timer.start();
+        timers.start("EdgePriority");
         compute_edge_priorities<float, blockThreads>
             <<<launch_box.blocks,
                launch_box.num_threads,
-               launch_box.smem_bytes_dyn>>>(rx.get_context(),
-                                            *coords,
-                                            pq.get_mutable_device_view(),
-                                            pq.get_shmem_size(blockThreads));
-        edge_priorities_timer.stop();
-        e_priority_time += edge_priorities_timer.elapsed_millis();
-        // cudaDeviceSynchronize();
-        // RXMESH_TRACE("launch_box.smem_bytes_dyn = {}",
-        // launch_box.smem_bytes_dyn); RXMESH_TRACE("pq.get_shmem_size = {}",
-        // pq.get_shmem_size(blockThreads));
+               launch_box.smem_bytes_dyn>>>(
+                rx.get_context(),
+                *coords,
+                priority_queue.get_mutable_device_view(),
+                priority_queue.get_shmem_size(blockThreads));
+        timers.stop("EdgePriority");
 
         // Next kernel needs to pop some percentage of the top
         // elements in the priority queue and store popped elements
         // to be used by the next kernel that actually does the collapses
-
-        float     reduce_ratio     = edge_reduce_ratio;
-        const int num_edges_before = int(rx.get_num_edges());
+        const int num_edges_before = int(rx.get_num_edges(true));
         const int reduce_threshold =
-            std::max(1, int(reduce_ratio * float(num_edges_before)));
+            std::max(1, int(edge_reduce_ratio * float(num_edges_before)));
         // Mark the edge attributes to be collapsed
-        uint32_t pop_num_edges =
-            reduce_threshold;  // reduce_ratio * rx.get_num_edges();
-        // RXMESH_TRACE("pop_num_edges: {}", pop_num_edges);
+        uint32_t pop_num_edges = reduce_threshold;
 
         constexpr uint32_t threads_per_block = 256;
         uint32_t           number_of_blocks =
             (pop_num_edges + threads_per_block - 1) / threads_per_block;
-        int shared_mem_bytes = pq.get_shmem_size(threads_per_block) +
-                               (threads_per_block * sizeof(PriorityPair_t));
-        // RXMESH_TRACE("threads_per_block: {}", threads_per_block);
-        // RXMESH_TRACE("number_of_blocks: {}", number_of_blocks);
-        // RXMESH_TRACE("shared_mem_bytes: {}", shared_mem_bytes);
-
-        GPUTimer pop_mark_timer;
-        pop_mark_timer.start();
+        int shared_mem_bytes =
+            priority_queue.get_shmem_size(threads_per_block) +
+            (threads_per_block * sizeof(PriorityPairT));
+
+        timers.start("PriorityQueuePop");
         pop_and_mark_edges_to_collapse<threads_per_block>
             <<<number_of_blocks, threads_per_block, shared_mem_bytes>>>(
-                pq.get_mutable_device_view(), *e_pop_attr, pop_num_edges);
+                priority_queue.get_mutable_device_view(),
+                *to_collapse,
+                pop_num_edges);
 
-        // if(num_passes == 1)
-        // {
-        //     render_edge_attr<bool>(rx, e_pop_attr);
-        // }
-        CUDA_ERROR(cudaDeviceSynchronize());
-        CUDA_ERROR(cudaGetLastError());
-        pop_mark_timer.stop();
-        pop_mark_time += pop_mark_timer.elapsed_millis();
+        timers.stop("PriorityQueuePop");
 
-        pq_timer.stop();
+        timers.stop("PriorityQueue");
 
-        pq_time += pq_timer.elapsed_millis();
+        {
+            to_collapse->move(DEVICE, HOST);
+            rx.get_polyscope_mesh()->addEdgeScalarQuantity("ToCollapse",
+                                                           *to_collapse);
+            polyscope::show();
+        }
 
-        // loop over the mesh, and try to collapse
 
+        // loop over the mesh, and try to collapse
         rx.reset_scheduler();
         while (!rx.is_queue_empty() &&
                rx.get_num_vertices(true) > final_num_vertices) {
@@ -257,67 +157,58 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx,
                            3 * ShmemAllocator::default_alignment;
                 });
 
-            max_smem_bytes_dyn =
-                std::max(max_smem_bytes_dyn, launch_box.smem_bytes_dyn);
-            max_smem_bytes_static =
-                std::max(max_smem_bytes_static, launch_box.smem_bytes_static);
-            max_num_registers_per_thread =
-                std::max(max_num_registers_per_thread,
-                         launch_box.num_registers_per_thread);
-            max_num_blocks =
-                std::max(max_num_blocks, DIVIDE_UP(launch_box.blocks, 8));
-            GPUTimer app_timer;
-
-            app_timer.start();
+            timers.start("App");
             secp<float, blockThreads><<<DIVIDE_UP(launch_box.blocks, 8),
                                         launch_box.num_threads,
                                         launch_box.smem_bytes_dyn>>>(
-                rx.get_context(), *coords, reduce_threshold, *e_pop_attr);
-            // should we cudaDeviceSyn here? stopping timers too soon?
-            // CUDA_ERROR(cudaDeviceSynchronize());
-            // CUDA_ERROR(cudaGetLastError());
+                rx.get_context(), *coords, *to_collapse);
+            timers.stop("App");
 
-            app_timer.stop();
-
-            GPUTimer cleanup_timer;
-            cleanup_timer.start();
+            timers.start("Cleanup");
             rx.cleanup();
-            cleanup_timer.stop();
+            timers.stop("Cleanup");
 
-            GPUTimer slice_timer;
-            slice_timer.start();
+            timers.start("Slice");
             rx.slice_patches(*coords);
-            slice_timer.stop();
+            timers.stop("Slice");
 
-            GPUTimer cleanup_timer2;
-            cleanup_timer2.start();
+            timers.start("Cleanup");
             rx.cleanup();
-            cleanup_timer2.stop();
+            timers.stop("Cleanup");
+        }
 
+        {
+            rx.update_polyscope();
 
-            CUDA_ERROR(cudaDeviceSynchronize());
-            CUDA_ERROR(cudaGetLastError());
+            auto ps_mesh = rx.get_polyscope_mesh();
+            ps_mesh->updateVertexPositions(*coords);
+            ps_mesh->setEnabled(false);
 
-            app_time += app_timer.elapsed_millis();
-            slice_time += slice_timer.elapsed_millis();
-            cleanup_time += cleanup_timer.elapsed_millis();
-            cleanup_time += cleanup_timer2.elapsed_millis();
+            rx.render_vertex_patch();
+            rx.render_edge_patch();
+            rx.render_face_patch();
+            polyscope::show();
         }
     }
-    timer.stop();
-    total_time += timer.elapsed_millis();
+    timers.stop("Total");
+
+
     CUDA_ERROR(cudaProfilerStop());
 
     RXMESH_INFO("secp_rxmesh() RXMesh SEC took {} (ms), num_passes= {}",
-                total_time,
+                timers.elapsed_millis("Total"),
                 num_passes);
-    RXMESH_INFO("secp_rxmesh() PriorityQ time {} (ms)", pq_time);
+    RXMESH_INFO("secp_rxmesh() PriorityQ time {} (ms)",
+                timers.elapsed_millis("PriorityQueue"));
     RXMESH_INFO("secp_rxmesh() |-Edge priorities time {} (ms)",
-                e_priority_time);
-    RXMESH_INFO("secp_rxmesh() |-Pop and Mark time {} (ms)", pop_mark_time);
-    RXMESH_INFO("secp_rxmesh() App time {} (ms)", app_time);
-    RXMESH_INFO("secp_rxmesh() Slice timer {} (ms)", slice_time);
-    RXMESH_INFO("secp_rxmesh() Cleanup timer {} (ms)", cleanup_time);
+                timers.elapsed_millis("EdgePriority"));
+    RXMESH_INFO("secp_rxmesh() |-Pop and Mark time {} (ms)",
+                timers.elapsed_millis("PriorityQueuePop"));
+    RXMESH_INFO("secp_rxmesh() App time {} (ms)", timers.elapsed_millis("App"));
+    RXMESH_INFO("secp_rxmesh() Slice timer {} (ms)",
+                timers.elapsed_millis("Slice"));
+    RXMESH_INFO("secp_rxmesh() Cleanup timer {} (ms)",
+                timers.elapsed_millis("Cleanup"));
 
     RXMESH_INFO("#Vertices {}", rx.get_num_vertices(true));
     RXMESH_INFO("#Edges {}", rx.get_num_edges(true));
@@ -326,8 +217,8 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx,
 
 
     rx.update_host();
-
     coords->move(DEVICE, HOST);
+    EXPECT_TRUE(rx.validate());
 
     report.add_member("num_passes", num_passes);
     report.add_member("max_smem_bytes_dyn", max_smem_bytes_dyn);
@@ -335,11 +226,12 @@ inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx,
     report.add_member("max_num_registers_per_thread",
                       max_num_registers_per_thread);
     report.add_member("max_num_blocks", max_num_blocks);
-    report.add_member("secp_remesh_time", total_time);
-    report.add_member("priority_queue_time", pq_time);
-    report.add_member("app_time", app_time);
-    report.add_member("slice_time", slice_time);
-    report.add_member("cleanup_time", cleanup_time);
+    report.add_member("secp_remesh_time", timers.elapsed_millis("Total"));
+    report.add_member("priority_queue_time",
+                      timers.elapsed_millis("PriorityQueue"));
+    report.add_member("app_time", timers.elapsed_millis("App"));
+    report.add_member("slice_time", timers.elapsed_millis("Slice"));
+    report.add_member("cleanup_time", timers.elapsed_millis("Cleanup"));
     report.add_member("attributes_memory_mg", coords->get_memory_mg());
     report.model_data(Arg.obj_file_name + "_after", rx, "model_after");