diff --git a/lib/pls/include/pls/internal/base/backoff.h b/lib/pls/include/pls/internal/base/backoff.h
index c4d3827..9e7cbf5 100644
--- a/lib/pls/include/pls/internal/base/backoff.h
+++ b/lib/pls/include/pls/internal/base/backoff.h
@@ -14,8 +14,10 @@ namespace internal {
 namespace base {
 
 class backoff {
-  const unsigned long INITIAL_SPIN_ITERS = 2u << 2u;
-  const unsigned long MAX_SPIN_ITERS = 2u << 6u;
+  const unsigned long INITIAL_SPIN_ITERS = 2u << 4u;
+  const unsigned long MAX_SPIN_ITERS = 2u << 8u;
+  const unsigned long MAX_ITERS = 2u << 10u;
+  const unsigned long YELD_ITERS = 2u << 10u;
 
   unsigned long current_ = INITIAL_SPIN_ITERS;
   std::minstd_rand random_;
@@ -32,10 +34,12 @@ class backoff {
     PROFILE_LOCK("Backoff")
     spin(random_() % std::min(current_, MAX_SPIN_ITERS));
 
-    current_ = current_ * 2;
-    if (current_ > MAX_SPIN_ITERS) {
-      current_ = MAX_SPIN_ITERS;
+    if (current_ >= YELD_ITERS) {
+      PROFILE_LOCK("Yield")
+      this_thread::yield();
     }
+
+    current_ = std::min(current_ * 2, MAX_ITERS);
   }
 
   void reset() {
diff --git a/lib/pls/include/pls/internal/data_structures/work_stealing_deque.h b/lib/pls/include/pls/internal/data_structures/work_stealing_deque.h
index 29fa12a..23f734d 100644
--- a/lib/pls/include/pls/internal/data_structures/work_stealing_deque.h
+++ b/lib/pls/include/pls/internal/data_structures/work_stealing_deque.h
@@ -12,8 +12,6 @@
 
 #include "aligned_stack.h"
 
-//#define LOCK_FREE_DEBUG_PRINT
-
 namespace pls {
 namespace internal {
 namespace data_structures {
@@ -87,10 +85,10 @@ class work_stealing_deque {
   using state = aligned_stack::state;
 
   explicit work_stealing_deque(aligned_stack *stack) : stack_{stack},
+                                                       base_pointer_{0},
                                                        head_{0},
                                                        tail_{0},
-                                                       previous_tail_{0},
-                                                       base_pointer_{0} {
+                                                       previous_tail_{0} {
     reset_base_pointer();
   }
   work_stealing_deque(const work_stealing_deque &other) : stack_{other.stack_},
@@ -128,9 +126,6 @@ class work_stealing_deque {
   template<typename T>
   Item *push_tail(const T &new_item) {
     cas_integer local_tail = tail_;
-    cas_integer local_head = head_;
-
-    PLS_ASSERT((local_tail >= get_offset(local_head)), "Tail MUST be in front of head!")
 
     auto new_pair = allocate_item(new_item);
     // Prepare current tail to point to correct next items
@@ -143,13 +138,6 @@ class work_stealing_deque {
     // Linearization point, item appears after this write
     cas_integer new_tail = current_stack_offset();
     tail_ = new_tail;
-#ifdef LOCK_FREE_DEBUG_PRINT
-    {
-      std::lock_guard<base::spin_lock> lock{lock_};
-      std::cout << base::this_thread::state<scheduling::thread_state>()->id_ << " - "
-                << "Pushed Tail " << local_tail << "->" << new_tail << std::endl;
-    }
-#endif
 
     return &(new_pair->second);
   }
@@ -172,13 +160,6 @@ class work_stealing_deque {
     local_head = head_; // Linearization point, outside knows list is empty
 
     if (get_offset(local_head) < new_tail) {
-#ifdef LOCK_FREE_DEBUG_PRINT
-      {
-        std::lock_guard<base::spin_lock> lock{lock_};
-        std::cout << base::this_thread::state<scheduling::thread_state>()->id_ << " - "
-                  << "Poped Tail (distance) " << local_tail << "->" << new_tail << std::endl;
-      }
-#endif
       return previous_tail_item->data<Item>(); // Success, enough distance to other threads
     }
 
@@ -186,26 +167,10 @@ class work_stealing_deque {
       cas_integer new_head = set_stamp(new_tail, get_stamp(local_head) + 1);
       // Try competing with consumers by updating the head's stamp value
       if (head_.compare_exchange_strong(local_head, new_head)) {
-#ifdef LOCK_FREE_DEBUG_PRINT
-        {
-          std::lock_guard<base::spin_lock> lock{lock_};
-          std::cout << base::this_thread::state<scheduling::thread_state>()->id_ << " - "
-                    << "Poped Tail (won competition 1) " << local_tail << "->" << new_tail << std::endl;
-        }
-#endif
         return previous_tail_item->data<Item>(); // SUCCESS, we won the competition with other threads
       }
     }
 
-#ifdef LOCK_FREE_DEBUG_PRINT
-    {
-      std::lock_guard<base::spin_lock> lock{lock_};
-      std::cout << base::this_thread::state<scheduling::thread_state>()->id_ << " - "
-                << "FAILED to pop tail (lost competition) " << get_offset(local_head) << "; " << local_tail << "->"
-                << new_tail << std::endl;
-    }
-#endif
-
     // Some other thread either won the competition or it already set the head further than we are
     // before we even tried to compete with it.
     // Reset the queue into an empty state => head_ = tail_
@@ -236,22 +201,9 @@ class work_stealing_deque {
     // 3) owning thread removed tail, we lose to this
     cas_integer new_head = set_stamp(next_item_offset, get_stamp(local_head) + 1);
     if (head_.compare_exchange_strong(local_head, new_head)) {
-#ifdef LOCK_FREE_DEBUG_PRINT
-      {
-        std::lock_guard<base::spin_lock> lock{lock_};
-        std::cout << base::this_thread::state<scheduling::thread_state>()->id_ << " - "
-                  << "Popped Head " << get_offset(local_head) << "->" << next_item_offset << std::endl;
-      }
-#endif
       return head_data_item; // SUCCESS, we won the competition
     }
-#ifdef LOCK_FREE_DEBUG_PRINT
-    {
-      std::lock_guard<base::spin_lock> lock{lock_};
-      std::cout << base::this_thread::state<scheduling::thread_state>()->id_ << " - "
-                << "Failed to pop head " << get_offset(local_head) << "->" << next_item_offset << std::endl;
-    }
-#endif
+
     return nullptr; // EMPTY, we lost the competition
 
   }
@@ -270,14 +222,6 @@ class work_stealing_deque {
         head_ = set_stamp(item_offset, get_stamp(local_head) + 1);
       }
     }
-
-#ifdef LOCK_FREE_DEBUG_PRINT
-    {
-      std::lock_guard<base::spin_lock> lock{lock_};
-      std::cout << base::this_thread::state<scheduling::thread_state>()->id_ << " - "
-                << "Release Memory " << item_offset << std::endl;
-    }
-#endif
   }
 
   void release_memory_until(Item *item) {
diff --git a/lib/pls/src/internal/scheduling/abstract_task.cpp b/lib/pls/src/internal/scheduling/abstract_task.cpp
index 0e2166f..aeccb1d 100644
--- a/lib/pls/src/internal/scheduling/abstract_task.cpp
+++ b/lib/pls/src/internal/scheduling/abstract_task.cpp
@@ -1,3 +1,4 @@
+#include <pls/internal/base/backoff.h>
 #include "pls/internal/helpers/profiler.h"
 
 #include "pls/internal/scheduling/thread_state.h"
@@ -9,6 +10,8 @@ namespace internal {
 namespace scheduling {
 
 bool abstract_task::steal_work() {
+  thread_local static base::backoff backoff{};
+
   PROFILE_STEALING("abstract_task::steal_work")
   const auto my_state = base::this_thread::state<thread_state>();
   const auto my_scheduler = my_state->scheduler_;
@@ -44,6 +47,7 @@ bool abstract_task::steal_work() {
         if (internal_stealing(current_task)) {
           // internal steal was a success, hand it back to the internal scheduler
           target_state->lock_.reader_unlock();
+          backoff.reset();
           return true;
         }
 
@@ -61,6 +65,7 @@ bool abstract_task::steal_work() {
       auto lock = &target_state->lock_;
       if (current_task->split_task(lock)) {
         // top level steal was a success (we did a top level task steal)
+        backoff.reset();
         return false;
       }
 
@@ -71,6 +76,7 @@ bool abstract_task::steal_work() {
   }
 
   // internal steal was no success
+  backoff.do_backoff();
   return false;
 }