1 files changed, 26 insertions, 20 deletions
diff --git a/bbot/agent/agent.cxx b/bbot/agent/agent.cxx
index 5df470f..0b57208 100644
--- a/bbot/agent/agent.cxx
+++ b/bbot/agent/agent.cxx
@@ -1076,7 +1076,7 @@ enumerate_machines (const dir_path& machines)
 {
   tracer trace ("enumerate_machines", machines.string ().c_str ());
 
-  size_t dir_iter_retry (0); // Directory iteration retry (see below).
+  size_t dir_iter_retries (0); // Directory iteration retry count (see below).
 
   for (;;) // From-scratch retry loop for after bootstrap (see below).
   {
@@ -1152,6 +1152,7 @@ enumerate_machines (const dir_path& machines)
     //
     try
     {
+      bool dir_iter_retry (false);
       for (const dir_entry& ve:
              dir_iterator (machines, dir_iterator::no_follow))
       {
@@ -1426,31 +1427,36 @@ enumerate_machines (const dir_path& machines)
         }
         catch (const system_error& e)
         {
-          fail << "unable to iterate over " << vd << ": " << e;
+          // Once in a while we get ENOENT while iterating over the machines
+          // volume directory. This directory contains the machine directories
+          // (not btrfs subvolumes) and is not being changed when we get this
+          // error. Maybe this is due to directory sizes/timestamps changes,
+          // but then we would expect to get this error a lot more often..? So
+          // this feels like a btrfs bug which we are going to retry a few
+          // times. See GH issue #349 for additional information.
+          //
+          dir_iter_retry = (dir_iter_retries++ != 3);
+
+          (dir_iter_retry
+           ? warn
+           : error) << "unable to iterate over " << vd << ": " << e;
+
+          if (dir_iter_retry)
+            break;
+          else
+            throw failed ();
         }
       } // Outer dir_iterator loop.
-    }
-    catch (const system_error& e)
-    {
-      // Once in a while we get ENOENT while iterating over the machines
-      // directory. This directory contains the machine directories (not
-      // subvolumes) and is not being changed when we get this error. Maybe
-      // this is due to directory sizes/timestamps changes, but then we would
-      // expect to get this error a lot more often..? So this feels like a
-      // btrfs bug which we are going to retry a few times. See GH issue #349
-      // for additional information.
-      //
-      bool retry (dir_iter_retry++ != 2);
 
-      (retry ? warn : error) << "unable to iterate over " << machines
-                             << ": " << e;
-      if (retry)
+      if (dir_iter_retry)
         continue; // Re-enumerate from scratch.
       else
-        throw failed ();
+        dir_iter_retries = 0; // Reset for re-enumeration due to other reasons.
+    }
+    catch (const system_error& e)
+    {
+      fail << "unable to iterate over " << machines << ": " << e;
     }
-
-    dir_iter_retry = 0; // Reset for re-enumeration due to other reasons.
 
     // See if there is a pending bootstrap and whether we can perform it.
     //